1use alloc::borrow::Cow;
2use alloc::vec::Vec;
3use alloc::string::String;
4use core::fmt::{Display, Formatter};
5use core::marker::PhantomData;
6
7use core::str::from_utf8_unchecked;
8
9use urlencoding::decode_binary;
10
11use crate::escaper::{escape_double_quotes, escape_plain, escape_single_quotes};
12use crate::tokenizer::iterator::Event::ErrorEvent;
13use crate::tokenizer::{Reader, Slicer};
14use crate::Lexer;
15
16use super::StrReader;
17
18pub struct EventIterator<'a, R, RB = &'a [u8], I = ()> {
28 pub(crate) reader: R,
30 pub(crate) buffer: RB,
31 pub(crate) state: Lexer,
33 pub(crate) tag: Option<Cow<'a, [u8]>>,
35 pub(crate) anchor: Option<Cow<'a, [u8]>>,
37 phantom: PhantomData<(&'a I, RB)>,
39}
40
41impl<'a> From<&'a str> for EventIterator<'a, StrReader<'a>, &'a [u8]> {
42 fn from(value: &'a str) -> Self {
43 EventIterator {
44 reader: StrReader::from(value),
45 state: Lexer::default(),
46 buffer: value.as_bytes(),
47 tag: None,
48 anchor: None,
49 phantom: PhantomData,
50 }
51 }
52}
53
54impl<'a> From<&'a [u8]> for EventIterator<'a, StrReader<'a>, &'a [u8]> {
55 fn from(value: &'a [u8]) -> Self {
56 EventIterator {
57 reader: StrReader::from(value),
58 state: Lexer::default(),
59 buffer: value,
60 tag: None,
61 anchor: None,
62 phantom: PhantomData,
63 }
64 }
65}
66
67#[derive(Copy, Clone, PartialEq, Debug)]
68pub enum ScalarType {
69 Plain,
70 Folded,
71 Literal,
72 SingleQuote,
73 DoubleQuote,
74}
75
76#[derive(Copy, Clone, PartialEq)]
77pub enum DirectiveType {
78 Yaml,
79 Tag,
80 Reserved,
81}
82
83#[derive(Clone, PartialEq)]
84pub enum Event<'a> {
85 DocStart {
86 explicit: bool,
87 },
88 DocEnd {
89 explicit: bool,
90 },
91 SeqStart {
92 tag: Option<Cow<'a, [u8]>>,
93 anchor: Option<Cow<'a, [u8]>>,
94 flow: bool,
95 },
96 SeqEnd,
97 MapStart {
98 tag: Option<Cow<'a, [u8]>>,
99 anchor: Option<Cow<'a, [u8]>>,
100 flow: bool,
101 },
102 MapEnd,
103 Directive {
104 directive_type: DirectiveType,
105 value: Cow<'a, [u8]>,
106 },
107 Scalar {
108 tag: Option<Cow<'a, [u8]>>,
109 anchor: Option<Cow<'a, [u8]>>,
110 scalar_type: ScalarType,
111 value: Cow<'a, [u8]>,
112 },
113 Alias(Cow<'a, [u8]>),
114 ErrorEvent,
115}
116
117impl<'a> Display for Event<'a> {
118 fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result {
119 match self {
120 Event::DocStart { explicit } => {
121 let exp_str = if *explicit { " ---" } else { "" };
122 write!(f, "+DOC{exp_str}")
123 }
124 Event::DocEnd { explicit } => {
125 let exp_str = if *explicit { " ..." } else { "" };
126 write!(f, "-DOC{exp_str}")
127 }
128 Event::SeqStart { flow, tag, anchor } => {
129 write!(f, "+SEQ",)?;
130 if *flow {
131 write!(f, " []")?;
132 }
133 if let Some(cow) = anchor {
134 let string = unsafe { from_utf8_unchecked(cow.as_ref()) };
135 write!(f, " &{string}")?;
136 };
137 if let Some(cow) = tag {
138 let string = unsafe { from_utf8_unchecked(cow.as_ref()) };
139 write!(f, " <{string}>")?;
140 };
141 Ok(())
142 }
143 Event::SeqEnd => {
144 write!(f, "-SEQ")
145 }
146 Event::MapStart { flow, tag, anchor } => {
147 write!(f, "+MAP")?;
148 if *flow {
149 write!(f, " {{}}")?;
150 }
151 if let Some(cow) = anchor {
152 let string = unsafe { from_utf8_unchecked(cow.as_ref()) };
153 write!(f, " &{string}")?;
154 };
155 if let Some(cow) = tag {
156 let string = unsafe { from_utf8_unchecked(cow.as_ref()) };
157 write!(f, " <{string}>")?;
158 };
159 Ok(())
160 }
161 Event::MapEnd => {
162 write!(f, "-MAP")
163 }
164 Event::Directive {
165 directive_type,
166 value,
167 } => {
168 let val_str = unsafe { from_utf8_unchecked(value.as_ref()) };
169 match directive_type {
170 DirectiveType::Yaml => write!(f, "%YAML {val_str}"),
171 _ => write!(f, "{val_str}"),
172 }
173 }
174 Event::Scalar {
175 scalar_type,
176 value,
177 tag,
178 anchor,
179 } => {
180 let val_str = unsafe { from_utf8_unchecked(value.as_ref()) };
181 write!(f, "=VAL")?;
182
183 if let Some(cow) = anchor {
184 let string: &str = unsafe { from_utf8_unchecked(cow.as_ref()) };
185 write!(f, " &{string}")?;
186 };
187 if let Some(cow) = tag {
188 let string = unsafe { from_utf8_unchecked(cow.as_ref()) };
189 write!(f, " <{string}>")?;
190 };
191 match *scalar_type {
192 ScalarType::Plain => write!(f, " :"),
193 ScalarType::Folded => write!(f, " >"),
194 ScalarType::Literal => write!(f, " |"),
195 ScalarType::SingleQuote => write!(f, " \'"),
196 ScalarType::DoubleQuote => write!(f, " \""),
197 }?;
198 write!(f, "{val_str}")?;
199
200 Ok(())
201 }
202 ErrorEvent => {
203 write!(f, "ERR")
204 }
205 Event::Alias(value) => {
206 let val_str = unsafe { from_utf8_unchecked(value.as_ref()) };
207 write!(f, "=ALI *{val_str}")
208 }
209 }
210 }
211}
212
213impl<'a> Slicer<'a> for &'a [u8] {
214 fn slice(&self, start: usize, end: usize) -> &'a [u8] {
215 unsafe { self.get_unchecked(start..end) }
216 }
217}
218
219impl<'a, R, RB, B> Iterator for EventIterator<'a, R, RB, B>
220where
221 R: Reader<B>,
222 RB: Slicer<'a>,
223{
224 type Item = Event<'a>;
225
226 fn next(&mut self) -> Option<Self::Item> {
227 pub use crate::tokenizer::iterator::Event::*;
228 pub use crate::tokenizer::LexerToken::*;
229
230 loop {
231 if self.state.is_empty() && !self.state.stream_end {
232 self.state.fetch_next_token(&mut self.reader);
233 }
234
235 if let Some(x) = self.state.pop_token() {
236 let token = x.into();
237 match token {
238 SequenceStart => {
239 return Some(SeqStart {
240 flow: true,
241 tag: self.tag.take(),
242 anchor: self.anchor.take(),
243 });
244 }
245 SequenceStartImplicit => {
246 return Some(SeqStart {
247 flow: false,
248 tag: self.tag.take(),
249 anchor: self.anchor.take(),
250 });
251 }
252 MappingStart => {
253 return Some(MapStart {
254 flow: true,
255 tag: self.tag.take(),
256 anchor: self.anchor.take(),
257 });
258 }
259 MappingStartImplicit => {
260 return Some(MapStart {
261 flow: false,
262 tag: self.tag.take(),
263 anchor: self.anchor.take(),
264 });
265 }
266 DocumentStart => {
267 return Some(DocStart { explicit: false });
268 }
269 DocumentStartExplicit => {
270 return Some(DocStart { explicit: true });
271 }
272 SequenceEnd => {
273 return Some(SeqEnd);
274 }
275 MappingEnd => {
276 return Some(MapEnd);
277 }
278 DocumentEnd => {
279 return Some(DocEnd { explicit: false });
280 }
281 DocumentEndExplicit => {
282 return Some(DocEnd { explicit: true });
283 }
284 ErrorToken => return Some(ErrorEvent),
285 DirectiveReserved | DirectiveTag | DirectiveYaml => {
286 let directive_type = unsafe { token.to_yaml_directive() };
287 return if let (Some(start), Some(end)) =
288 (self.state.pop_token(), self.state.pop_token())
289 {
290 let slice = Cow::Borrowed(self.buffer.slice(start, end));
291 Some(Directive {
292 directive_type,
293 value: slice,
294 })
295 } else {
296 panic!("Error in processing YAML file");
297 };
298 }
299 ScalarPlain | ScalarLit | ScalarFold | ScalarDoubleQuote
300 | ScalarSingleQuote | Mark => {
301 let scalar_type = unsafe { token.to_scalar() };
303 let mut cow: Cow<'a, [u8]> = Cow::default();
304 loop {
305 match (self.state.peek_token(), self.state.peek_token_next()) {
306 (Some(start), Some(end))
307 if start < NewLine as usize && end < NewLine as usize =>
308 {
309 if cow.is_empty() {
310 cow = Cow::Borrowed(self.buffer.slice(start, end));
311 } else {
312 cow.to_mut().extend(self.buffer.slice(start, end));
313 }
314 self.state.pop_token();
315 self.state.pop_token();
316 }
317 (Some(newline), Some(line)) if newline == NewLine as usize => {
318 if line == 0 {
319 cow.to_mut().extend(" ".as_bytes());
320 } else {
321 cow.to_mut().extend("\n".repeat(line).as_bytes());
322 }
323 self.state.pop_token();
324 self.state.pop_token();
325 }
326 (_, _) => {
327 break;
328 }
329 }
330 }
331 let cow = match scalar_type {
332 ScalarType::Plain | ScalarType::Literal | ScalarType::Folded => {
333 escape_plain(cow)
334 }
335 ScalarType::DoubleQuote => escape_double_quotes(cow),
336 ScalarType::SingleQuote => escape_single_quotes(cow),
337 };
338 return Some(Scalar {
339 scalar_type,
340 value: cow,
341 tag: self.tag.take(),
342 anchor: self.anchor.take(),
343 });
344 }
345 AliasToken => {
346 if let (Some(start), Some(end)) =
347 (self.state.pop_token(), self.state.pop_token())
348 {
349 return Some(Alias(Cow::Borrowed(self.buffer.slice(start, end))));
350 }
351 }
352 AnchorToken => {
353 if let (Some(start), Some(end)) =
354 (self.state.pop_token(), self.state.pop_token())
355 {
356 self.anchor = Some(Cow::Borrowed(self.buffer.slice(start, end)));
357 }
358 }
359 TagStart => {
360 if let (Some(start), Some(mid), Some(end)) = (
361 self.state.pop_token(),
362 self.state.pop_token(),
363 self.state.pop_token(),
364 ) {
365 let namespace = self.buffer.slice(start, mid);
366 let extension = if end == 0 {
367 b""
368 } else {
369 self.buffer.slice(mid, end)
370 };
371 self.tag = if let Some(&(e1, e2)) = self.state.tags.get(namespace) {
372 let mut tag = Vec::from(self.buffer.slice(e1, e2));
373 tag.extend_from_slice(extension);
374 if tag.contains(&b'%') {
375 tag = decode_binary(&tag).into_owned();
376 }
377 Some(Cow::Owned(tag))
378 } else if namespace == b"!!" && !extension.is_empty() {
379 let mut cow: Cow<'_, [u8]> =
380 Cow::Owned(b"tag:yaml.org,2002:".to_vec());
381 cow.to_mut().extend(extension);
382 Some(cow)
383 } else if namespace == b"!" {
384 let mut cow: Cow<'_, [u8]> = Cow::Owned(b"!".to_vec());
385 cow.to_mut().extend(extension);
386 Some(cow)
387 } else if extension.is_empty() && end == 0 {
388 Some(Cow::Borrowed(namespace))
389 } else {
390 return Some(Event::ErrorEvent);
391 }
392 }
393 }
394 NewLine | ScalarEnd => {}
395 }
396 }
397 if self.state.stream_end && self.state.is_empty() {
398 return None;
399 }
400 }
401 }
402}
403
404pub fn assert_eq_event(input: &str, events: &str) {
405 use core::fmt::Write;
406
407 let mut line = String::with_capacity(events.as_bytes().len());
408 let scan: EventIterator<'_, StrReader, _> = EventIterator::from(input);
409 scan.for_each(|ev| {
410 line.push('\n');
411 write!(line, "{ev:}").unwrap();
412 });
413
414 assert_eq!(line, events, "Error in {input}");
415}