yyaml/parser/
loader.rs

1// Parser removed - using StateMachine directly
2use crate::error::{Marker, ScanError};
3
4use crate::events::{Event, EventReceiver, TScalarStyle, TokenType};
5use crate::linked_hash_map::LinkedHashMap;
6use crate::semantic::tags::schema::SchemaProcessor;
7use crate::semantic::tags::types::SchemaType;
8use crate::yaml::Yaml;
9use log::{debug, trace, warn};
10use std::char::decode_utf16;
11use std::collections::HashMap;
12
13/// Encoding types for YAML byte streams
14#[derive(Debug, Clone, Copy)]
15pub enum Encoding {
16    Utf8,
17    Utf16Le,
18    Utf16Be,
19}
20
21/// Byte order for UTF-16 decoding
22#[derive(Debug, Clone, Copy)]
23enum Endian {
24    Little,
25    Big,
26}
27
28/// Our main "public" API: load from a string → produce Vec<Yaml>.
29pub struct YamlLoader;
30
31impl YamlLoader {
32    /// Load YAML from string using Failsafe schema by default (all scalars as strings)
33    pub fn load_from_str(s: &str) -> Result<Vec<Yaml>, ScanError> {
34        Self::load_from_str_with_schema(s, SchemaType::Failsafe)
35    }
36
37    /// Load YAML from string with explicit schema selection
38    ///
39    /// # Schemas
40    /// - `Core`: Full YAML 1.2 with all standard types (null, bool, int, float, str, binary, timestamp, etc.)
41    /// - `Failsafe`: Minimal types only (all scalars treated as strings)
42    /// - `Json`: JSON-compatible subset
43    /// - `Custom`: User-defined types
44    pub fn load_from_str_with_schema(s: &str, schema: SchemaType) -> Result<Vec<Yaml>, ScanError> {
45        debug!(
46            "=== YamlLoader::load_from_str_with_schema ENTRY with: '{}', schema: {:?} ===",
47            s, schema
48        );
49
50        let mut schema_processor = SchemaProcessor::<'static>::new();
51        schema_processor.set_schema(schema);
52
53        // Fast path for simple cases - zero allocation, blazing fast
54        debug!("YamlLoader: trying fast parse");
55        match Self::try_fast_parse(s, &mut schema_processor) {
56            Ok(Some(result)) => {
57                debug!("Fast parser succeeded with: {result:?}");
58                return Ok(vec![result]);
59            }
60            Ok(None) => {
61                debug!("Fast parser detected complex syntax, falling back to full parser");
62                debug!("YamlLoader: fast parser returned None, falling back to StateMachine");
63            } // Fall through to full parser
64            Err(error) => {
65                debug!("Fast parser failed: {error:?}");
66                return Err(error);
67            } // Propagate parsing errors
68        }
69
70        // Handle multi-document streams
71        let mut documents = Vec::new();
72        debug!(
73            "YamlLoader: creating StateMachine with schema: {:?}",
74            schema
75        );
76        let mut state_machine = crate::parser::state_machine::StateMachine::new_with_processor(
77            s.chars(),
78            schema,
79            schema_processor,
80        );
81        debug!("YamlLoader: StateMachine created, starting document parsing loop");
82
83        // Process all documents in stream
84        while !state_machine.at_stream_end() {
85            debug!("YamlLoader: parsing next document...");
86            match state_machine.parse_next_document() {
87                Ok(Some(doc)) => {
88                    debug!("Parsed document: {doc:?}");
89                    documents.push(doc);
90                }
91                Ok(None) => break, // End of stream
92                Err(e) => {
93                    debug!("State machine failed: {e:?}");
94                    return Err(e);
95                }
96            }
97        }
98
99        // Handle empty streams (return empty vec, not error)
100        if documents.is_empty() {
101            debug!("No documents found in stream");
102            documents.push(Yaml::Null);
103        }
104
105        Ok(documents)
106    }
107
108    pub fn load_from_bytes(input: Vec<u8>) -> Result<Vec<Yaml>, ScanError> {
109        if input.is_empty() {
110            return Ok(vec![Yaml::Null]);
111        }
112
113        let mut bytes = input.as_slice();
114        let encoding = Self::detect_bom(&mut bytes)?;
115
116        let decoded = match encoding {
117            Encoding::Utf8 => {
118                // Already checked BOM, decode remaining
119                std::str::from_utf8(bytes)
120                    .map_err(|e| ScanError::EncodingError(format!("Invalid UTF-8: {}", e)))?
121                    .to_string()
122            }
123            Encoding::Utf16Le => Self::decode_utf16_bytes(bytes, Endian::Little)?,
124            Encoding::Utf16Be => Self::decode_utf16_bytes(bytes, Endian::Big)?,
125        };
126
127        // Now use existing parser
128        Self::load_from_str(&decoded)
129    }
130
131    fn detect_bom(bytes: &mut &[u8]) -> Result<Encoding, ScanError> {
132        if bytes.len() < 2 {
133            return Ok(Encoding::Utf8);
134        }
135
136        match bytes.get(0..3) {
137            Some(&[239, 187, 191]) => {
138                *bytes = &bytes[3..];
139                Ok(Encoding::Utf8)
140            }
141            _ if bytes[0] == 255 && bytes[1] == 254 => {
142                *bytes = &bytes[2..];
143                Ok(Encoding::Utf16Le)
144            }
145            _ if bytes[0] == 254 && bytes[1] == 255 => {
146                *bytes = &bytes[2..];
147                Ok(Encoding::Utf16Be)
148            }
149            _ => Ok(Encoding::Utf8), // Fallback
150        }
151    }
152
153    fn decode_utf16_bytes(bytes: &[u8], endian: Endian) -> Result<String, ScanError> {
154        if !bytes.len().is_multiple_of(2) {
155            return Err(ScanError::EncodingError(
156                "Invalid UTF-16: odd byte length".to_string(),
157            ));
158        }
159        let u16_iter = bytes.chunks_exact(2).map(|chunk| match endian {
160            Endian::Little => u16::from_le_bytes([chunk[0], chunk[1]]),
161            Endian::Big => u16::from_be_bytes([chunk[0], chunk[1]]),
162        });
163        decode_utf16(u16_iter)
164            .collect::<Result<String, _>>()
165            .map_err(|e| ScanError::EncodingError(format!("Invalid UTF-16: {}", e)))
166    }
167
168    /// Blazing-fast zero-allocation parser for common simple cases with production-grade error handling
169    /// Handles: "key: value", "- item", "[1, 2, 3]", "{key: value}", multi-line mappings, and simple scalars
170    fn try_fast_parse(
171        s: &str,
172        processor: &mut SchemaProcessor<'static>,
173    ) -> Result<Option<Yaml>, ScanError> {
174        debug!(
175            "try_fast_parse called with: '{}' (schema: {:?})",
176            s,
177            processor.current_schema()
178        );
179        let mut trimmed = s.trim();
180        debug!("try_fast_parse: trimmed = '{}'", trimmed);
181
182        // Strip BOM if present for accurate parsing decisions per YAML 1.2
183        if trimmed.starts_with('\u{feff}') {
184            trimmed = &trimmed[3..]; // BOM is 3 bytes in UTF-8
185        }
186
187        // Empty document
188        if trimmed.is_empty() {
189            return Ok(Some(Yaml::Null));
190        }
191
192        // CRITICAL FIX: If content starts with "- ", it's a sequence - ALWAYS use full parser
193        // The fast parser incorrectly handles complex sequences, so force full parser
194        if trimmed.starts_with("- ") {
195            return Ok(None);
196        }
197
198        // Simple scalar cases (no structure indicators)
199        if !trimmed.contains(':')
200            && !trimmed.contains('-')
201            && !trimmed.contains('[')
202            && !trimmed.contains('{')
203            && !trimmed.contains('|')
204            && !trimmed.contains('>')
205        {
206            return Self::resolve_plain_scalar(processor, trimmed).map(Some);
207        }
208
209        // YAML 1.2 Complete Feature Detection - Zero allocation, optimal performance
210        // Comprehensive spec compliance check using iterator chains for maximum efficiency
211
212        // Chapter 6.8: All directive detection (YAML, TAG, reserved)
213        let has_directives = trimmed.lines().any(|line| {
214            let trimmed_line = line.trim_start();
215            trimmed_line.starts_with("%YAML ")
216                || trimmed_line.starts_with("%TAG ")
217                || (trimmed_line.starts_with('%')
218                    && trimmed_line
219                        .chars()
220                        .nth(1)
221                        .is_some_and(|c| c.is_ascii_uppercase()))
222        });
223        if has_directives {
224            return Ok(None);
225        }
226
227        // Chapter 9.2: Multi-document stream detection - optimized counting
228        let mut doc_markers = 0u8;
229        let mut line_start = true;
230        for (i, &byte) in trimmed.as_bytes().iter().enumerate() {
231            match byte {
232                b'\n' => line_start = true,
233                b'-' if line_start => {
234                    if trimmed.as_bytes().get(i + 1) == Some(&b'-')
235                        && trimmed.as_bytes().get(i + 2) == Some(&b'-')
236                        && trimmed
237                            .as_bytes()
238                            .get(i + 3)
239                            .is_none_or(|&b| b == b' ' || b == b'\t' || b == b'\n')
240                    {
241                        doc_markers += 1;
242                        if doc_markers > 1 {
243                            return Ok(None);
244                        }
245                    }
246                    line_start = false;
247                }
248                b'.' if line_start => {
249                    if trimmed.as_bytes().get(i + 1) == Some(&b'.')
250                        && trimmed.as_bytes().get(i + 2) == Some(&b'.')
251                        && trimmed
252                            .as_bytes()
253                            .get(i + 3)
254                            .is_none_or(|&b| b == b' ' || b == b'\t' || b == b'\n')
255                    {
256                        return Ok(None); // Any document end marker requires full parser
257                    }
258                    line_start = false;
259                }
260                b' ' | b'\t' => {}
261                _ => line_start = false,
262            }
263        }
264
265        // Chapter 6.9: Node properties in mapping contexts - comprehensive detection
266        if trimmed.contains(':') {
267            let has_node_properties = trimmed.lines().any(|line| {
268                let trimmed_line = line.trim();
269                // Tag detection: ! not at start of line or after whitespace indicating tagged values
270                if let Some(exclaim_pos) = trimmed_line.find('!') {
271                    // Not a comment (!= case) and not negation (!something without space)
272                    let is_tag = exclaim_pos == 0
273                        || trimmed_line
274                            .chars()
275                            .nth(exclaim_pos.saturating_sub(1))
276                            .is_some_and(|c| c.is_whitespace())
277                        || trimmed_line[exclaim_pos..].starts_with("!!")
278                        || trimmed_line[exclaim_pos..]
279                            .chars()
280                            .nth(1)
281                            .is_some_and(|c| c.is_ascii_lowercase() || c == '<');
282                    if is_tag {
283                        return true;
284                    }
285                }
286                // Anchor detection: & followed by valid anchor characters
287                if let Some(amp_pos) = trimmed_line.find('&') {
288                    let is_anchor = trimmed_line[amp_pos + 1..]
289                        .chars()
290                        .next()
291                        .is_some_and(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-');
292                    if is_anchor {
293                        return true;
294                    }
295                }
296                // Alias detection: * followed by valid anchor characters
297                if let Some(star_pos) = trimmed_line.find('*') {
298                    let is_alias = trimmed_line[star_pos + 1..]
299                        .chars()
300                        .next()
301                        .is_some_and(|c| c.is_ascii_alphanumeric() || c == '_' || c == '-');
302                    if is_alias {
303                        return true;
304                    }
305                }
306                false
307            });
308            if has_node_properties {
309                return Ok(None);
310            }
311        }
312
313        // Chapter 8.2: Complex block mapping structures that exceed fast parser capabilities
314        if trimmed.contains(':') && trimmed.lines().count() > 1 {
315            // Detect explicit mapping indicators (?) requiring full parser
316            if trimmed.lines().any(|line| {
317                let trimmed_line = line.trim_start();
318                trimmed_line.starts_with("? ") || trimmed_line == "?"
319            }) {
320                return Ok(None);
321            }
322
323            // Detect flow collections embedded in block mappings
324            if trimmed.chars().any(|c| matches!(c, '[' | ']' | '{' | '}')) {
325                return Ok(None);
326            }
327
328            // Detect complex indentation patterns that require full parser
329            let mut prev_indent = None;
330            for line in trimmed.lines() {
331                if !line.trim().is_empty() && line.contains(':') {
332                    let indent = line.len() - line.trim_start().len();
333                    if let Some(prev) = prev_indent
334                        && indent != prev
335                        && indent != 0
336                    {
337                        return Ok(None); // Variable indentation requires full parser
338                    }
339                    prev_indent = Some(indent);
340                }
341            }
342        }
343
344        // Block sequence: handle lists with "- item" syntax (CHECK FIRST!)
345        // If it starts with "- ", it's likely a sequence - don't let block mapping claim it
346        if trimmed.starts_with("- ") {
347            // Try parsing as block sequence - let try_parse_block_sequence handle complexity
348            if Self::is_valid_block_sequence(trimmed) {
349                return Self::try_parse_block_sequence(trimmed, processor);
350            } else {
351                // Invalid structure - fall back to full parser instead of erroring
352                return Ok(None);
353            }
354        }
355
356        // Multi-line mapping: handle simple block mappings (ONLY if not a sequence)
357        // CRITICAL: Don't claim sequences that start with "- " as mappings!
358        if trimmed.contains(':') && trimmed.lines().count() > 1 && !trimmed.starts_with("- ") {
359            if let Some(result) = Self::try_parse_block_mapping(trimmed, processor) {
360                return Ok(Some(result));
361            } else {
362                // Complex mapping detected (anchors/aliases), fall back to full parser
363                return Ok(None);
364            }
365        }
366
367        // Single-line mapping: "key: value"
368        if trimmed.contains(':')
369            && trimmed.lines().count() == 1
370            && let Some(colon_pos) = trimmed.find(':')
371        {
372            let key_part = trimmed[..colon_pos].trim();
373            let value_part = trimmed[colon_pos + 1..].trim();
374
375            if !key_part.is_empty()
376                && !key_part.contains('[')
377                && !key_part.contains('{')
378                && !value_part.contains('[')
379                && !value_part.contains('{')
380                && !value_part.contains(':')
381                && !key_part.contains('&')
382                && !key_part.contains('*')
383                && !value_part.contains('&')
384                && !value_part.contains('*')
385            {
386                let mut hash = crate::linked_hash_map::LinkedHashMap::new();
387                let key = Yaml::String(key_part.to_string());
388                let value = if value_part.is_empty() {
389                    Yaml::Null
390                } else {
391                    Self::resolve_plain_scalar(processor, value_part)?
392                };
393                hash.insert(key, value);
394                return Ok(Some(Yaml::Hash(hash)));
395            }
396        }
397
398        // Simple array case: "[1, 2, 3]"
399        if trimmed.starts_with('[') && trimmed.ends_with(']') && trimmed.lines().count() == 1 {
400            return Self::try_parse_flow_sequence(trimmed, processor);
401        }
402
403        Ok(None)
404    }
405
406    /// Intelligent block sequence validation - zero allocation, blazing fast
407    /// Validates block sequence structure with support for nested content
408    #[inline]
409    fn is_valid_block_sequence(s: &str) -> bool {
410        let lines: Vec<&str> = s.lines().collect();
411        if lines.is_empty() {
412            return false;
413        }
414
415        let mut base_indent = None;
416        let mut in_sequence_item = false;
417        let mut item_indent = None;
418
419        for line in lines.iter() {
420            // Calculate indentation level
421            let trimmed = line.trim();
422            let indent_level = line.len() - line.trim_start().len();
423
424            // Skip empty lines and comments
425            if trimmed.is_empty() || trimmed.starts_with('#') {
426                continue;
427            }
428
429            if trimmed.starts_with("- ") {
430                // This is a sequence item marker
431                if base_indent.is_none() {
432                    base_indent = Some(indent_level);
433                } else if base_indent != Some(indent_level) {
434                    // Sequence items must be at same indentation level
435                    return false;
436                }
437                in_sequence_item = true;
438                item_indent = Some(indent_level + 2); // Content after "- " should be indented more
439            } else if in_sequence_item {
440                // This is content within a sequence item (nested mapping/sequence)
441                if let Some(expected_indent) = item_indent
442                    && indent_level < expected_indent
443                {
444                    // Content must be indented more than sequence marker
445                    return false;
446                }
447                // Allow nested content within sequence items
448            } else {
449                // First line should be a sequence item, or we're not in a valid sequence
450                return false;
451            }
452        }
453
454        // Must have encountered at least one sequence item
455        base_indent.is_some()
456    }
457
458    /// Parse simple block mapping format: key: value on separate lines
459    fn try_parse_block_mapping(s: &str, processor: &mut SchemaProcessor<'static>) -> Option<Yaml> {
460        let mut map = crate::linked_hash_map::LinkedHashMap::new();
461
462        // First pass: check for nested indented content - if found, fall back to full parser
463        let lines: Vec<&str> = s.lines().collect();
464        for (i, line) in lines.iter().enumerate() {
465            let line = line.trim();
466            if line.is_empty() || line.starts_with('#') {
467                continue;
468            }
469
470            // If this line has a colon with empty value, check if next non-empty line is indented
471            if let Some(colon_pos) = line.find(':') {
472                let value_part = line[colon_pos + 1..].trim();
473                if value_part.is_empty() && i + 1 < lines.len() {
474                    // Check if next non-empty line is indented (nested content)
475                    for next_line in &lines[i + 1..] {
476                        if next_line.trim().is_empty() || next_line.trim().starts_with('#') {
477                            continue;
478                        }
479                        let next_indent = next_line.len() - next_line.trim_start().len();
480                        let current_indent = lines[i].len() - lines[i].trim_start().len();
481                        if next_indent > current_indent {
482                            return None;
483                        }
484                        break;
485                    }
486                }
487            }
488        }
489
490        for line in s.lines() {
491            let trimmed = line.trim();
492            if trimmed.is_empty() || trimmed.starts_with('#') {
493                continue;
494            }
495            if let Some(colon_pos) = trimmed.find(':') {
496                let key = trimmed[..colon_pos].trim();
497                let value = trimmed[colon_pos + 1..].trim();
498                if key.is_empty() {
499                    return None;
500                }
501                let yaml_value = if value.is_empty() {
502                    Yaml::Null
503                } else {
504                    Self::resolve_plain_scalar(processor, value).ok()?
505                };
506                map.insert(Yaml::String(key.to_string()), yaml_value);
507            } else {
508                return None;
509            }
510        }
511
512        Some(Yaml::Hash(map))
513    }
514
515    fn try_parse_block_sequence(
516        s: &str,
517        processor: &mut SchemaProcessor<'static>,
518    ) -> Result<Option<Yaml>, ScanError> {
519        let mut items = Vec::new();
520        let mut lines_iter = s.lines().enumerate();
521
522        // Pre-allocate with estimated capacity for better performance
523        if s.len() > 100 {
524            items.reserve(s.len() / 50); // Rough estimate: 50 chars per item
525        }
526
527        while let Some((line_num, line)) = lines_iter.next() {
528            let trimmed = line.trim();
529
530            // Skip empty lines and comments - zero allocation fast path
531            if trimmed.is_empty() || trimmed.starts_with('#') {
532                continue;
533            }
534
535            if let Some(first_line_content) = trimmed.strip_prefix("- ") {
536                // Found a sequence item - collect all lines that belong to this item
537                let base_indent = line.len() - line.trim_start().len();
538                let item_content_indent = base_indent + 2; // Content after "- " should be more indented
539
540                // Zero-allocation parsing: work with string slices directly
541                // Remove "- " prefix
542                let first_content_trimmed = first_line_content.trim();
543
544                // Determine item boundaries without collecting into Vec
545                let _item_start_pos = if first_content_trimmed.is_empty() {
546                    None
547                } else {
548                    Some((first_content_trimmed, line_num, base_indent))
549                };
550
551                let mut item_end_line = line_num;
552                let mut has_multiline_content = false;
553
554                // Peek ahead to find item boundaries - zero allocation approach
555                let mut line_offset = 1;
556                let mut next_item_start = None;
557
558                for next_line in s.lines().skip(line_num + 1) {
559                    let actual_line_num = line_num + line_offset;
560                    let next_trimmed = next_line.trim();
561                    let next_indent = next_line.len() - next_line.trim_start().len();
562
563                    // Skip empty lines and comments
564                    if next_trimmed.is_empty() || next_trimmed.starts_with('#') {
565                        line_offset += 1;
566                        continue;
567                    }
568
569                    // If this line starts a new sequence item, stop collecting
570                    if next_trimmed.starts_with("- ") && next_indent == base_indent {
571                        next_item_start = Some(actual_line_num);
572                        break;
573                    }
574
575                    // If this line is at or less indented than expected content, stop collecting
576                    if next_indent < item_content_indent {
577                        break;
578                    }
579
580                    // This line belongs to the current sequence item
581                    item_end_line = actual_line_num;
582                    has_multiline_content = true;
583                    line_offset += 1;
584                }
585
586                // Parse item content with zero-allocation approach
587                let item = if !has_multiline_content {
588                    // Single line item - parse directly without allocation
589                    if first_content_trimmed.is_empty() {
590                        Ok(Yaml::Null)
591                    } else {
592                        Self::parse_item_content(first_content_trimmed, processor)
593                    }
594                } else {
595                    // Multi-line item - extract slice and parse
596                    let item_lines: Vec<&str> = s
597                        .lines()
598                        .skip(line_num)
599                        .take(item_end_line - line_num + 1)
600                        .collect();
601
602                    let mut content_parts = Vec::new();
603
604                    // Add first line content if not empty
605                    if !first_content_trimmed.is_empty() {
606                        content_parts.push(first_content_trimmed);
607                    }
608
609                    // Add subsequent lines with normalized indentation
610                    for item_line in item_lines.iter().skip(1) {
611                        let item_trimmed = item_line.trim();
612                        if item_trimmed.is_empty() || item_trimmed.starts_with('#') {
613                            continue;
614                        }
615
616                        let item_indent = item_line.len() - item_line.trim_start().len();
617                        let normalized_line = if item_indent >= item_content_indent {
618                            &item_line[item_content_indent.min(item_line.len())..]
619                        } else {
620                            item_line
621                        };
622                        content_parts.push(normalized_line);
623                    }
624
625                    if content_parts.is_empty() {
626                        Ok(Yaml::Null)
627                    } else if content_parts.len() == 1 {
628                        Self::parse_item_content(content_parts[0], processor)
629                    } else {
630                        // Only allocate string when absolutely necessary
631                        let joined_content = content_parts.join("\n");
632                        Self::parse_item_content(&joined_content, processor)
633                    }
634                };
635
636                // Handle parsing errors
637                let parsed_item = item?;
638
639                items.push(parsed_item);
640
641                // Skip lines we've already processed
642                if let Some(next_start) = next_item_start {
643                    // Fast-forward iterator to next item
644                    for (current_line_num, _) in lines_iter.by_ref() {
645                        if current_line_num + 1 >= next_start {
646                            break;
647                        }
648                    }
649                } else {
650                    // Skip to end of current item
651                    for _ in line_num..item_end_line {
652                        lines_iter.next();
653                    }
654                }
655            } else {
656                // Unexpected line that doesn't start with "- " at the expected level
657                return Err(ScanError::new(
658                    Marker {
659                        index: 0,
660                        line: line_num + 1,
661                        col: 0,
662                    },
663                    &format!(
664                        "invalid block sequence: expected '- ' at line {}, found '{}'",
665                        line_num + 1,
666                        trimmed
667                    ),
668                ));
669            }
670        }
671
672        if items.is_empty() {
673            Ok(None)
674        } else {
675            Ok(Some(Yaml::Array(items)))
676        }
677    }
678
679    /// Parse content within a sequence item - handles scalars, mappings, and nested sequences
680    /// Returns errors for malformed nested content
681    #[inline]
682    fn parse_item_content(
683        content: &str,
684        processor: &mut SchemaProcessor<'static>,
685    ) -> Result<Yaml, ScanError> {
686        let trimmed = content.trim();
687        if trimmed.is_empty() {
688            return Ok(Yaml::Null);
689        }
690
691        // Use direct scalar parsing to avoid infinite recursion
692        // (parse_item_content is called from try_fast_parse, so we can't call try_fast_parse again)
693
694        // For complex content, use scalar parsing as fallback
695        // This maintains compatibility while allowing nested structures
696        Self::resolve_plain_scalar(processor, trimmed)
697    }
698
699    fn try_parse_flow_sequence(
700        s: &str,
701        processor: &mut SchemaProcessor<'static>,
702    ) -> Result<Option<Yaml>, ScanError> {
703        // Strip brackets
704        let inner = &s[1..s.len() - 1].trim();
705        if inner.is_empty() {
706            return Ok(Some(Yaml::Array(Vec::new())));
707        }
708
709        let mut items = Vec::new();
710        for item in inner.split(',') {
711            items.push(Self::resolve_plain_scalar(processor, item.trim())?);
712        }
713        Ok(Some(Yaml::Array(items)))
714    }
715
716    /// Direct scalar parsing with schema-aware type inference
717    /// Uses SchemaProcessor for proper YAML 1.2 schema compliance
718    fn resolve_plain_scalar(
719        processor: &mut SchemaProcessor<'static>,
720        raw: &str,
721    ) -> Result<Yaml, ScanError> {
722        let trimmed = raw.trim();
723        let marker = Marker {
724            index: 0,
725            line: 1,
726            col: 1,
727        };
728
729        Self::convert_plain_scalar(processor, trimmed, marker)
730    }
731
732    fn convert_plain_scalar(
733        processor: &mut SchemaProcessor<'static>,
734        trimmed: &str,
735        marker: Marker,
736    ) -> Result<Yaml, ScanError> {
737        use crate::semantic::tags::types::YamlType;
738
739        if trimmed.len() >= 2
740            && ((trimmed.starts_with('"') && trimmed.ends_with('"'))
741                || (trimmed.starts_with('\'') && trimmed.ends_with('\'')))
742        {
743            return Ok(Yaml::String(trimmed[1..trimmed.len() - 1].to_string()));
744        }
745
746        match processor.infer_scalar_type(trimmed) {
747            YamlType::Null => Ok(Yaml::Null),
748            YamlType::Bool => match processor.current_schema() {
749                SchemaType::Json => match trimmed {
750                    "true" => Ok(Yaml::Boolean(true)),
751                    "false" => Ok(Yaml::Boolean(false)),
752                    _ => Err(ScanError::new(
753                        marker,
754                        &format!(
755                            "Scalar '{trimmed}' is not a canonical JSON boolean (expected 'true' or 'false')"
756                        ),
757                    )),
758                },
759                _ => match trimmed.to_ascii_lowercase().as_str() {
760                    "true" | "yes" | "on" => Ok(Yaml::Boolean(true)),
761                    "false" | "no" | "off" => Ok(Yaml::Boolean(false)),
762                    _ => Ok(Yaml::String(trimmed.to_string())),
763                },
764            },
765            YamlType::Int => {
766                if processor.is_integer_pattern(trimmed) {
767                    trimmed
768                        .parse::<i64>()
769                        .map(Yaml::Integer)
770                        .map_err(|_| {
771                            ScanError::new(
772                                marker,
773                                &format!("Scalar '{trimmed}' is not a canonical JSON integer"),
774                            )
775                        })
776                } else {
777                    Err(ScanError::new(
778                        marker,
779                        &format!("Scalar '{trimmed}' is not a canonical JSON integer"),
780                    ))
781                }
782            }
783            YamlType::Float => match processor.current_schema() {
784                SchemaType::Json => {
785                    if processor.is_float_pattern(trimmed) {
786                        trimmed
787                            .parse::<f64>()
788                            .map(|f| Yaml::Real(f.to_string()))
789                            .map_err(|_| {
790                                ScanError::new(
791                                    marker,
792                                    &format!(
793                                        "Scalar '{trimmed}' is not a canonical JSON number"
794                                    ),
795                                )
796                            })
797                    } else {
798                        Err(ScanError::new(
799                            marker,
800                            &format!("Scalar '{trimmed}' is not a canonical JSON number"),
801                        ))
802                    }
803                }
804                _ => match trimmed.to_ascii_lowercase().as_str() {
805                    ".inf" | "+.inf" => Ok(Yaml::Real("+.inf".to_string())),
806                    "-.inf" => Ok(Yaml::Real("-.inf".to_string())),
807                    ".nan" => Ok(Yaml::Real(".nan".to_string())),
808                    _ => trimmed
809                        .parse::<f64>()
810                        .map(|f| Yaml::Real(f.to_string()))
811                        .map_err(|_| {
812                            ScanError::new(
813                                marker,
814                                &format!("Scalar '{trimmed}' is not a valid YAML float"),
815                            )
816                        }),
817                },
818            },
819            YamlType::Str
820            | YamlType::Unknown
821            | YamlType::Custom(_)
822            | YamlType::Binary
823            | YamlType::Timestamp
824            | YamlType::Seq
825            | YamlType::Map
826            | YamlType::Pairs
827            | YamlType::Set
828            | YamlType::Omap
829            | YamlType::Merge
830            | YamlType::Value => Ok(Yaml::String(trimmed.to_string())),
831        }
832    }
833}
834
835/// The data structure that builds `Yaml` AST from parser events
836pub struct YamlReceiver {
837    pub docs: Vec<Yaml>,
838    doc_stack: Vec<(Yaml, usize)>,
839    key_stack: Vec<Yaml>,
840    anchors: HashMap<usize, Yaml>,
841    // Simple circular reference detection
842    resolution_stack: Vec<usize>,
843    // Billion laughs protection
844    alias_count: usize,
845    // Schema processor for scalar type inference
846    schema_processor: SchemaProcessor<'static>,
847    error: Option<ScanError>,
848}
849
850impl Default for YamlReceiver {
851    fn default() -> Self {
852        Self::new()
853    }
854}
855
856impl YamlReceiver {
857    #[must_use]
858    pub fn new() -> Self {
859        Self::new_with_schema(SchemaType::Core)
860    }
861
862    #[must_use]
863    pub fn new_with_schema(schema: SchemaType) -> Self {
864        let mut processor = SchemaProcessor::<'static>::new();
865        processor.set_schema(schema);
866        Self {
867            docs: Vec::with_capacity(1),         // Most YAML files have 1 document
868            doc_stack: Vec::with_capacity(8),    // Typical nesting depth
869            key_stack: Vec::with_capacity(8),    // Typical mapping depth
870            anchors: HashMap::with_capacity(16), // Reasonable anchor count
871            resolution_stack: Vec::with_capacity(8), // Rare deep circular refs
872            alias_count: 0,                      // Start with no aliases processed
873            schema_processor: processor,
874            error: None,
875        }
876    }
877
878    #[inline]
879    fn insert_new_node(&mut self, (node, aid): (Yaml, usize)) {
880        if self.error.is_some() {
881            return;
882        }
883        // store anchor if needed - blazing-fast HashMap operations
884        if aid > 0 {
885            self.anchors.insert(aid, node.clone());
886        }
887        if self.doc_stack.is_empty() {
888            self.doc_stack.push((node, 0));
889        } else if let Some(top) = self.doc_stack.last_mut() {
890            match top.0 {
891                Yaml::Array(ref mut arr) => arr.push(node),
892                Yaml::Hash(ref mut h) => {
893                    if let Some(cur_key) = self.key_stack.last_mut() {
894                        if cur_key.is_badvalue() {
895                            *cur_key = node;
896                        } else {
897                            let mut swap_key = Yaml::BadValue;
898                            std::mem::swap(&mut swap_key, cur_key);
899                            h.insert(swap_key, node);
900                        }
901                    }
902                }
903                _ => {}
904            }
905        }
906    }
907
908    /// Blazing-fast alias resolution with circular reference protection
909    #[inline]
910    fn resolve_alias(&mut self, id: usize) -> Yaml {
911        // Billion laughs protection - limit total alias resolutions
912        self.alias_count += 1;
913        if self.alias_count > 1000 {
914            warn!(
915                "Alias count exceeded limit ({}), potential billion laughs attack",
916                self.alias_count
917            );
918            return Yaml::Null;
919        }
920
921        // Fast circular reference check - O(n) but n is typically very small (< 10 deep)
922        if self.resolution_stack.contains(&id) {
923            warn!(
924                "Circular reference detected for alias ID {}, breaking cycle",
925                id
926            );
927            return Yaml::Null;
928        }
929
930        // Look up the anchored value and return it immediately
931        if let Some(anchored_node) = self.anchors.get(&id).cloned() {
932            anchored_node
933        } else {
934            warn!("Anchor ID {} not found, returning null", id);
935            Yaml::Null
936        }
937    }
938
939    /// Reset alias tracking state (called between documents)
940    #[inline]
941    fn reset_alias_tracking(&mut self) {
942        self.resolution_stack.clear();
943        self.alias_count = 0;
944    }
945}
946
947impl EventReceiver for YamlReceiver {
948    fn on_event(&mut self, ev: Event) {
949        if self.error.is_some() {
950            return;
951        }
952        trace!(
953            "YAML EVENT: {:?} (doc_stack len: {}, docs len: {})",
954            ev,
955            self.doc_stack.len(),
956            self.docs.len()
957        );
958        match ev {
959            Event::DocumentStart => {
960                // Reset alias tracking for each new document
961                self.reset_alias_tracking();
962            }
963            Event::DocumentEnd => match self.doc_stack.len() {
964                0 => self.docs.push(Yaml::BadValue),
965                1 => {
966                    if let Some((doc, _)) = self.doc_stack.pop() {
967                        self.docs.push(doc);
968                    }
969                }
970                _ => {}
971            },
972            Event::StreamStart => {}
973            Event::StreamEnd => {}
974            Event::Alias(id) => {
975                let node = self.resolve_alias(id);
976                self.insert_new_node((node, 0));
977            }
978            Event::Scalar(s, style, aid, tag) => {
979                let node = if style != TScalarStyle::Plain {
980                    Yaml::String(s)
981                } else if let Some(TokenType::Tag(ref handle, ref suffix)) = tag {
982                    // handle tag
983                    if handle == "!!" {
984                        match suffix.as_str() {
985                            "bool" => match s.parse::<bool>() {
986                                Ok(b) => Yaml::Boolean(b),
987                                Err(_) => Yaml::BadValue,
988                            },
989                            "int" => match s.parse::<i64>() {
990                                Ok(i) => Yaml::Integer(i),
991                                Err(_) => Yaml::BadValue,
992                            },
993                            "float" => match s.parse::<f64>() {
994                                Ok(_) => Yaml::Real(s),
995                                Err(_) => Yaml::BadValue,
996                            },
997                            "null" => {
998                                if s == "~" || s == "null" {
999                                    Yaml::Null
1000                                } else {
1001                                    Yaml::BadValue
1002                                }
1003                            }
1004                            _ => Yaml::String(s),
1005                        }
1006                    } else {
1007                        // Preserve custom tag by creating a Tagged variant
1008                        let tag_name = if handle.is_empty() {
1009                            suffix.clone()
1010                        } else {
1011                            format!("{}{}", handle, suffix)
1012                        };
1013                        match YamlLoader::resolve_plain_scalar(&mut self.schema_processor, &s) {
1014                            Ok(value) => Yaml::Tagged(tag_name, Box::new(value)),
1015                            Err(err) => {
1016                                self.error = Some(err);
1017                                Yaml::Null
1018                            }
1019                        }
1020                    }
1021                } else {
1022                    // autodetect
1023                    match YamlLoader::resolve_plain_scalar(&mut self.schema_processor, &s) {
1024                        Ok(value) => value,
1025                        Err(err) => {
1026                            self.error = Some(err);
1027                            Yaml::Null
1028                        }
1029                    }
1030                };
1031                self.insert_new_node((node, aid));
1032            }
1033            Event::SequenceStart(aid) => {
1034                self.doc_stack.push((Yaml::Array(Vec::new()), aid));
1035            }
1036            Event::SequenceEnd => {
1037                if let Some(top) = self.doc_stack.pop() {
1038                    self.insert_new_node(top);
1039                }
1040            }
1041            Event::MappingStart(aid) => {
1042                let h = LinkedHashMap::new();
1043                self.doc_stack.push((Yaml::Hash(h), aid));
1044                self.key_stack.push(Yaml::BadValue);
1045            }
1046            Event::MappingEnd => {
1047                self.key_stack.pop();
1048                if let Some(top) = self.doc_stack.pop() {
1049                    self.insert_new_node(top);
1050                }
1051            }
1052            Event::YamlDirective(_major, _minor) => {
1053                // Store YAML version directive for document processing
1054                // This is handled at the state machine level, no action needed here
1055            }
1056            Event::TagDirective(_handle, _prefix) => {
1057                // Store TAG directive for document processing
1058                // This is handled at the state machine level, no action needed here
1059            }
1060            Event::Nothing => {}
1061        }
1062    }
1063}
1064
1065impl YamlReceiver {
1066    pub fn into_result(self) -> Result<Vec<Yaml>, ScanError> {
1067        if let Some(err) = self.error {
1068            Err(err)
1069        } else {
1070            Ok(self.docs)
1071        }
1072    }
1073}
1074
1075// Old load function removed - StateMachine::parse() handles loading directly
1076/*
1077pub fn load<T: Iterator<Item = char>, R: MarkedEventReceiver>(
1078    parser: &mut Parser<T>,
1079    recv: &mut R,
1080    multi: bool,
1081) -> Result<(), ScanError> {
1082    // ZERO-ALLOCATION, NON-RECURSIVE LOADER USING EXPLICIT STACK
1083    // Uses Vec<ContainerType> to track nesting instead of recursion
1084    #[derive(Debug, Clone, Copy)]
1085    enum ContainerType {
1086        Sequence,
1087        Mapping,
1088    }
1089
1090    let mut nesting_stack: Vec<ContainerType> = Vec::with_capacity(32); // Pre-allocate for performance
1091    let mut documents_processed = 0;
1092    let mut in_document = false;
1093
1094    // Ensure stream has started
1095    if !parser.scanner.stream_started() {
1096        let (ev, mark) = parser.next()?;
1097        if ev != Event::StreamStart {
1098            return Err(ScanError::new(
1099                mark,
1100                &format!("Expected StreamStart event, got {ev:?}")
1101            ));
1102        }
1103        recv.on_event(ev, mark);
1104    }
1105
1106    if parser.scanner.stream_ended() {
1107        recv.on_event(Event::StreamEnd, parser.scanner.mark());
1108        return Ok(());
1109    }
1110
1111    // FLAT EVENT PROCESSING LOOP - ZERO RECURSION
1112    loop {
1113        let (ev, mark) = parser.next()?;
1114
1115        match ev {
1116            Event::StreamEnd => {
1117                if in_document {
1118                    recv.on_event(Event::DocumentEnd, mark);
1119                }
1120                recv.on_event(ev, mark);
1121                break;
1122            }
1123
1124            Event::DocumentStart => {
1125                if in_document && multi {
1126                    recv.on_event(Event::DocumentEnd, mark);
1127                }
1128                parser.anchors.clear();
1129                recv.on_event(ev, mark);
1130                in_document = true;
1131                documents_processed += 1;
1132                if !multi && documents_processed > 1 {
1133                    // Single document mode: ignore additional documents
1134                    continue;
1135                }
1136            }
1137
1138            Event::DocumentEnd => {
1139                recv.on_event(ev, mark);
1140                in_document = false;
1141            }
1142
1143            Event::SequenceStart(_) => {
1144                if !in_document {
1145                    // Implicit document start
1146                    parser.anchors.clear();
1147                    recv.on_event(Event::DocumentStart, mark);
1148                    in_document = true;
1149                    documents_processed += 1;
1150                }
1151                recv.on_event(ev, mark);
1152                nesting_stack.push(ContainerType::Sequence);
1153            }
1154
1155            Event::SequenceEnd => {
1156                recv.on_event(ev, mark);
1157                if let Some(ContainerType::Sequence) = nesting_stack.pop() {
1158                    // Correct nesting
1159                } else {
1160                    return Err(ScanError::new(
1161                        mark,
1162                        "Unexpected SequenceEnd: not inside sequence"
1163                    ));
1164                }
1165            }
1166
1167            Event::MappingStart(_) => {
1168                if !in_document {
1169                    // Implicit document start
1170                    parser.anchors.clear();
1171                    recv.on_event(Event::DocumentStart, mark);
1172                    in_document = true;
1173                    documents_processed += 1;
1174                }
1175                recv.on_event(ev, mark);
1176                nesting_stack.push(ContainerType::Mapping);
1177            }
1178
1179            Event::MappingEnd => {
1180                recv.on_event(ev, mark);
1181                if let Some(ContainerType::Mapping) = nesting_stack.pop() {
1182                    // Correct nesting
1183                } else {
1184                    return Err(ScanError::new(
1185                        mark,
1186                        "Unexpected MappingEnd: not inside mapping"
1187                    ));
1188                }
1189            }
1190
1191            Event::Scalar(..) | Event::Alias(..) => {
1192                if !in_document {
1193                    // Implicit document start
1194                    parser.anchors.clear();
1195                    recv.on_event(Event::DocumentStart, mark);
1196                    in_document = true;
1197                    documents_processed += 1;
1198                }
1199                recv.on_event(ev, mark);
1200            }
1201
1202            _ => {
1203                // Handle any other events directly
1204                if !in_document {
1205                    // Implicit document start
1206                    parser.anchors.clear();
1207                    recv.on_event(Event::DocumentStart, mark);
1208                    in_document = true;
1209                    documents_processed += 1;
1210                }
1211                recv.on_event(ev, mark);
1212            }
1213        }
1214
1215        // Single document mode: break after processing first document
1216        if !multi && documents_processed >= 1 && nesting_stack.is_empty() && in_document {
1217            // Continue to find StreamEnd
1218            loop {
1219                let (next_ev, next_mark) = parser.next()?;
1220                if matches!(next_ev, Event::StreamEnd) {
1221                    recv.on_event(Event::DocumentEnd, next_mark);
1222                    recv.on_event(next_ev, next_mark);
1223                    break;
1224                }
1225                // Skip other events in single document mode
1226            }
1227            break;
1228        }
1229    }
1230
1231    // Verify all containers were properly closed
1232    if !nesting_stack.is_empty() {
1233        return Err(ScanError::new(
1234            parser.scanner.mark(),
1235            &format!("Unclosed containers at end of stream: {} remaining", nesting_stack.len())
1236        ));
1237    }
1238
1239    Ok(())
1240}
1241*/
1242
1243// REMOVED: load_document function - replaced with flat, non-recursive loader
1244// This function was causing stack overflow via recursive calls to load_node
1245
1246// REMOVED: load_node function - replaced with flat, non-recursive loader
1247// This function was causing infinite recursion via load_sequence/load_mapping calls
1248
1249// REMOVED: load_sequence function - replaced with flat, non-recursive loader
1250// This function was causing infinite recursion via load_node calls
1251
1252// REMOVED: load_mapping function - replaced with flat, non-recursive loader
1253// This function was causing infinite recursion via load_node calls