rust_yaml/scanner/
token_processor.rs

1//! Token processing and generation for YAML scanner
2
3use super::{QuoteStyle, Token, TokenType};
4use crate::Position;
5
6/// Token generation helper functions
7pub(super) struct TokenProcessor;
8
9impl TokenProcessor {
10    /// Create a simple token without data
11    pub fn simple_token(token_type: TokenType, position: Position) -> Token {
12        Token::simple(token_type, position)
13    }
14
15    /// Create a scalar token
16    pub fn scalar_token(value: String, quote_style: QuoteStyle, position: Position) -> Token {
17        Token::new(TokenType::Scalar(value, quote_style), position, position)
18    }
19
20    /// Create an anchor token
21    pub fn anchor_token(name: String, position: Position) -> Token {
22        Token::new(TokenType::Anchor(name), position, position)
23    }
24
25    /// Create an alias token
26    pub fn alias_token(name: String, position: Position) -> Token {
27        Token::new(TokenType::Alias(name), position, position)
28    }
29
30    /// Create a tag token
31    pub fn tag_token(tag: String, position: Position) -> Token {
32        Token::new(TokenType::Tag(tag), position, position)
33    }
34
35    /// Create a comment token
36    pub fn comment_token(comment: String, position: Position) -> Token {
37        Token::new(TokenType::Comment(comment), position, position)
38    }
39
40    /// Create a literal block scalar token
41    pub fn literal_block_token(value: String, position: Position) -> Token {
42        Token::new(TokenType::BlockScalarLiteral(value), position, position)
43    }
44
45    /// Create a folded block scalar token
46    pub fn folded_block_token(value: String, position: Position) -> Token {
47        Token::new(TokenType::BlockScalarFolded(value), position, position)
48    }
49}
50
51/// Character classification helpers
52pub(super) struct CharClassifier;
53
54impl CharClassifier {
55    /// Check if character is a flow indicator
56    pub fn is_flow_indicator(ch: char) -> bool {
57        matches!(ch, '[' | ']' | '{' | '}' | ',' | ':')
58    }
59
60    /// Check if character can start an identifier
61    pub fn is_identifier_start(ch: char) -> bool {
62        ch.is_ascii_alphabetic() || ch == '_'
63    }
64
65    /// Check if character can be in an identifier
66    pub fn is_identifier_char(ch: char) -> bool {
67        ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_')
68    }
69
70    /// Check if character is a document indicator
71    pub fn is_document_indicator(ch: char) -> bool {
72        matches!(ch, '-' | '.')
73    }
74
75    /// Check if character is whitespace (YAML definition)
76    pub fn is_yaml_whitespace(ch: char) -> bool {
77        matches!(ch, ' ' | '\t')
78    }
79
80    /// Check if character is a line break
81    pub fn is_line_break(ch: char) -> bool {
82        matches!(ch, '\n' | '\r')
83    }
84
85    /// Check if character is printable ASCII
86    pub fn is_printable_ascii(ch: char) -> bool {
87        ch.is_ascii() && !ch.is_ascii_control() || ch == '\t'
88    }
89
90    /// Check if character is a digit
91    pub fn is_digit(ch: char) -> bool {
92        ch.is_ascii_digit()
93    }
94
95    /// Check if character is hex digit
96    pub fn is_hex_digit(ch: char) -> bool {
97        ch.is_ascii_hexdigit()
98    }
99
100    /// Check if character is octal digit
101    pub fn is_octal_digit(ch: char) -> bool {
102        matches!(ch, '0'..='7')
103    }
104}
105
106/// Pattern matching helpers
107pub(super) struct PatternMatcher;
108
109impl PatternMatcher {
110    /// Check for document start pattern (---)
111    pub fn is_document_start(input: &str, pos: usize) -> bool {
112        let chars: Vec<char> = input.chars().collect();
113        if pos + 2 >= chars.len() {
114            return false;
115        }
116
117        chars[pos] == '-'
118            && chars[pos + 1] == '-'
119            && chars[pos + 2] == '-'
120            && (pos + 3 >= chars.len()
121                || CharClassifier::is_yaml_whitespace(chars[pos + 3])
122                || CharClassifier::is_line_break(chars[pos + 3]))
123    }
124
125    /// Check for document end pattern (...)
126    pub fn is_document_end(input: &str, pos: usize) -> bool {
127        let chars: Vec<char> = input.chars().collect();
128        if pos + 2 >= chars.len() {
129            return false;
130        }
131
132        chars[pos] == '.'
133            && chars[pos + 1] == '.'
134            && chars[pos + 2] == '.'
135            && (pos + 3 >= chars.len()
136                || CharClassifier::is_yaml_whitespace(chars[pos + 3])
137                || CharClassifier::is_line_break(chars[pos + 3]))
138    }
139
140    /// Check if we're at the start of a tag
141    pub fn is_tag_start(ch: char) -> bool {
142        ch == '!'
143    }
144
145    /// Check if we're at the start of an anchor
146    pub fn is_anchor_start(ch: char) -> bool {
147        ch == '&'
148    }
149
150    /// Check if we're at the start of an alias
151    pub fn is_alias_start(ch: char) -> bool {
152        ch == '*'
153    }
154
155    /// Check if we're at the start of a comment
156    pub fn is_comment_start(ch: char) -> bool {
157        ch == '#'
158    }
159}