erl_tokenize/tokens.rs
1//! Tokens.
2use num_bigint::BigUint;
3use std::borrow::Cow;
4use std::fmt;
5use std::str;
6
7use crate::util;
8use crate::values::{Keyword, Symbol, Whitespace};
9use crate::{Error, Position, PositionRange, Result};
10
11/// Atom token.
12///
13/// # Examples
14///
15/// ```
16/// use erl_tokenize::Position;
17/// use erl_tokenize::tokens::AtomToken;
18///
19/// let pos = Position::new();
20///
21/// // Ok
22/// assert_eq!(AtomToken::from_text("foo", pos.clone()).unwrap().value(), "foo");
23/// assert_eq!(AtomToken::from_text("foo ", pos.clone()).unwrap().value(), "foo");
24/// assert_eq!(AtomToken::from_text("'foo'", pos.clone()).unwrap().value(), "foo");
25/// assert_eq!(AtomToken::from_text(r"'f\x6Fo'", pos.clone()).unwrap().value(), "foo");
26///
27/// // Err
28/// assert!(AtomToken::from_text(" foo", pos.clone()).is_err());
29/// assert!(AtomToken::from_text("123", pos.clone()).is_err());
30/// ```
31#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
32pub struct AtomToken {
33 value: Option<String>,
34 text: String,
35 pos: Position,
36}
37impl AtomToken {
38 /// Makes a new `AtomToken` instance from the value.
39 ///
40 /// # Examples
41 ///
42 /// ```
43 /// use erl_tokenize::Position;
44 /// use erl_tokenize::tokens::AtomToken;
45 ///
46 /// let pos = Position::new();
47 /// assert_eq!(AtomToken::from_value("foo", pos.clone()).text(), "'foo'");
48 /// assert_eq!(AtomToken::from_value("foo's", pos.clone()).text(), r"'foo\'s'");
49 /// ```
50 pub fn from_value(value: &str, pos: Position) -> Self {
51 let mut text = "'".to_string();
52 for c in value.chars() {
53 match c {
54 '\'' => text.push_str("\\'"),
55 '\\' => text.push_str("\\\\"),
56 _ => text.push(c),
57 }
58 }
59 text.push('\'');
60 AtomToken {
61 value: Some(value.to_string()),
62 text,
63 pos,
64 }
65 }
66
67 /// Tries to convert from any prefixes of the input text to an `AtomToken`.
68 pub fn from_text(text: &str, pos: Position) -> Result<Self> {
69 let head_len = text
70 .chars()
71 .next()
72 .ok_or_else(|| Error::invalid_atom_token(pos.clone()))?
73 .len_utf8();
74 let (head, tail) = text.split_at(head_len);
75 let (value, text) = if head == "'" {
76 let (value, end) = util::parse_quotation(pos.clone(), tail, '\'')?;
77 let value = Some(value.to_string());
78 (value, unsafe { text.get_unchecked(0..=1 + end) })
79 } else {
80 let head = head.chars().next().expect("unreachable");
81 if !util::is_atom_head_char(head) {
82 return Err(Error::invalid_atom_token(pos));
83 }
84 let end = head.len_utf8()
85 + tail
86 .find(|c| !util::is_atom_non_head_char(c))
87 .unwrap_or(tail.len());
88 let text_slice = unsafe { text.get_unchecked(0..end) };
89 (None, text_slice)
90 };
91 let text = text.to_owned();
92 Ok(AtomToken { value, text, pos })
93 }
94
95 /// Returns the value of this token.
96 ///
97 /// # Examples
98 ///
99 /// ```
100 /// use erl_tokenize::Position;
101 /// use erl_tokenize::tokens::AtomToken;
102 ///
103 /// let pos = Position::new();
104 ///
105 /// assert_eq!(AtomToken::from_text("foo", pos.clone()).unwrap().value(), "foo");
106 /// assert_eq!(AtomToken::from_text("'foo'", pos.clone()).unwrap().value(), "foo");
107 /// assert_eq!(AtomToken::from_text(r"'f\x6Fo'", pos.clone()).unwrap().value(), "foo");
108 /// ```
109 pub fn value(&self) -> &str {
110 self.value.as_ref().unwrap_or(&self.text)
111 }
112
113 /// Returns the original textual representation of this token.
114 ///
115 /// # Examples
116 ///
117 /// ```
118 /// use erl_tokenize::Position;
119 /// use erl_tokenize::tokens::AtomToken;
120 ///
121 /// let pos = Position::new();
122 ///
123 /// assert_eq!(AtomToken::from_text("foo", pos.clone()).unwrap().text(), "foo");
124 /// assert_eq!(AtomToken::from_text("'foo'", pos.clone()).unwrap().text(), "'foo'");
125 /// assert_eq!(AtomToken::from_text(r"'f\x6Fo'", pos.clone()).unwrap().text(), r"'f\x6Fo'");
126 /// ```
127 pub fn text(&self) -> &str {
128 &self.text
129 }
130}
131impl PositionRange for AtomToken {
132 fn start_position(&self) -> Position {
133 self.pos.clone()
134 }
135 fn end_position(&self) -> Position {
136 if self.value.is_none() {
137 self.pos.clone().step_by_width(self.text.len())
138 } else {
139 self.pos.clone().step_by_text(&self.text)
140 }
141 }
142}
143impl fmt::Display for AtomToken {
144 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
145 self.text().fmt(f)
146 }
147}
148
149/// Character token.
150///
151/// # Examples
152///
153/// ```
154/// use erl_tokenize::Position;
155/// use erl_tokenize::tokens::CharToken;
156///
157/// let pos = Position::new();
158///
159/// // Ok
160/// assert_eq!(CharToken::from_text("$a", pos.clone()).unwrap().value(), 'a');
161/// assert_eq!(CharToken::from_text("$a ", pos.clone()).unwrap().value(), 'a');
162/// assert_eq!(CharToken::from_text(r"$\t", pos.clone()).unwrap().value(), '\t');
163/// assert_eq!(CharToken::from_text(r"$\123", pos.clone()).unwrap().value(), 'I');
164/// assert_eq!(CharToken::from_text(r"$\x6F", pos.clone()).unwrap().value(), 'o');
165/// assert_eq!(CharToken::from_text(r"$\x{06F}", pos.clone()).unwrap().value(), 'o');
166/// assert_eq!(CharToken::from_text(r"$\^a", pos.clone()).unwrap().value(), '\u{1}');
167///
168/// // Err
169/// assert!(CharToken::from_text(" $a", pos.clone()).is_err());
170/// assert!(CharToken::from_text(r"$\", pos.clone()).is_err());
171/// assert!(CharToken::from_text("a", pos.clone()).is_err());
172/// ```
173#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
174pub struct CharToken {
175 value: char,
176 text: String,
177 pos: Position,
178}
179impl CharToken {
180 /// Makes a new `CharToken` instance from the value.
181 ///
182 /// # Examples
183 ///
184 /// ```
185 /// use erl_tokenize::Position;
186 /// use erl_tokenize::tokens::CharToken;
187 ///
188 /// let pos = Position::new();
189 /// assert_eq!(CharToken::from_value('a', pos.clone()).text(), "$a");
190 /// ```
191 pub fn from_value(value: char, pos: Position) -> Self {
192 let text = if value == '\\' {
193 r"$\\".to_string()
194 } else {
195 format!("${value}")
196 };
197 CharToken { value, text, pos }
198 }
199
200 /// Tries to convert from any prefixes of the text to a `CharToken`.
201 pub fn from_text(text: &str, pos: Position) -> Result<Self> {
202 let mut chars = text.char_indices();
203 if chars.next().map(|(_, c)| c) != Some('$') {
204 return Err(Error::invalid_char_token(pos));
205 }
206
207 let (_, c) = chars
208 .next()
209 .ok_or_else(|| Error::invalid_char_token(pos.clone()))?;
210 let (value, end) = if c == '\\' {
211 let mut chars = chars.peekable();
212 let value = util::parse_escaped_char(pos.clone(), &mut chars)?;
213 let end = chars.next().map(|(i, _)| i).unwrap_or_else(|| text.len());
214 (value, end)
215 } else {
216 let value = c;
217 let end = chars.next().map(|(i, _)| i).unwrap_or_else(|| text.len());
218 (value, end)
219 };
220 let text = unsafe { text.get_unchecked(0..end) }.to_owned();
221 Ok(CharToken { value, text, pos })
222 }
223
224 /// Returns the value of this token.
225 ///
226 /// # Example
227 ///
228 /// ```
229 /// use erl_tokenize::Position;
230 /// use erl_tokenize::tokens::CharToken;
231 ///
232 /// let pos = Position::new();
233 ///
234 /// assert_eq!(CharToken::from_text("$a", pos.clone()).unwrap().value(), 'a');
235 /// assert_eq!(CharToken::from_text(r"$\123", pos.clone()).unwrap().value(), 'I');
236 /// ```
237 pub fn value(&self) -> char {
238 self.value
239 }
240
241 /// Returns the original textual representation of this token.
242 ///
243 /// # Example
244 ///
245 /// ```
246 /// use erl_tokenize::Position;
247 /// use erl_tokenize::tokens::CharToken;
248 ///
249 /// let pos = Position::new();
250 ///
251 /// assert_eq!(CharToken::from_text("$a", pos.clone()).unwrap().text(), "$a");
252 /// assert_eq!(CharToken::from_text(r"$\123", pos.clone()).unwrap().text(), r#"$\123"#);
253 /// ```
254 pub fn text(&self) -> &str {
255 &self.text
256 }
257}
258impl PositionRange for CharToken {
259 fn start_position(&self) -> Position {
260 self.pos.clone()
261 }
262 fn end_position(&self) -> Position {
263 self.pos.clone().step_by_text(&self.text)
264 }
265}
266impl fmt::Display for CharToken {
267 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
268 self.text().fmt(f)
269 }
270}
271
272/// Comment token.
273///
274/// # Examples
275///
276/// ```
277/// use erl_tokenize::Position;
278/// use erl_tokenize::tokens::CommentToken;
279///
280/// let pos = Position::new();
281///
282/// // Ok
283/// assert_eq!(CommentToken::from_text("%", pos.clone()).unwrap().value(), "");
284/// assert_eq!(CommentToken::from_text("%% foo ", pos.clone()).unwrap().value(), "% foo ");
285///
286/// // Err
287/// assert!(CommentToken::from_text(" % foo", pos.clone()).is_err());
288/// ```
289#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
290pub struct CommentToken {
291 text: String,
292 pos: Position,
293}
294impl CommentToken {
295 /// Makes a new `CommentToken` instance from the value.
296 ///
297 /// # Examples
298 ///
299 /// ```
300 /// use erl_tokenize::Position;
301 /// use erl_tokenize::tokens::CommentToken;
302 ///
303 /// let pos = Position::new();
304 /// assert_eq!(CommentToken::from_value("foo", pos.clone()).unwrap().text(), "%foo");
305 /// ```
306 pub fn from_value(value: &str, pos: Position) -> Result<Self> {
307 if value.find('\n').is_some() {
308 return Err(Error::invalid_comment_token(pos));
309 }
310
311 let text = format!("%{value}");
312 Ok(CommentToken { text, pos })
313 }
314
315 /// Tries to convert from any prefixes of the text to a `CommentToken`.
316 pub fn from_text(text: &str, pos: Position) -> Result<Self> {
317 if !text.starts_with('%') {
318 return Err(Error::invalid_comment_token(pos));
319 }
320
321 let end = text.find('\n').unwrap_or(text.len());
322 let text = unsafe { text.get_unchecked(0..end) }.to_owned();
323 Ok(CommentToken { text, pos })
324 }
325
326 /// Returns the value of this token.
327 ///
328 /// # Examples
329 ///
330 /// ```
331 /// use erl_tokenize::Position;
332 /// use erl_tokenize::tokens::CommentToken;
333 ///
334 /// let pos = Position::new();
335 ///
336 /// assert_eq!(CommentToken::from_text("%", pos.clone()).unwrap().value(), "");
337 /// assert_eq!(CommentToken::from_text("%% foo ", pos.clone()).unwrap().value(), "% foo ");
338 /// ```
339 pub fn value(&self) -> &str {
340 unsafe { self.text().get_unchecked(1..self.text.len()) }
341 }
342
343 /// Returns the original textual representation of this token.
344 ///
345 /// # Examples
346 ///
347 /// ```
348 /// use erl_tokenize::Position;
349 /// use erl_tokenize::tokens::CommentToken;
350 ///
351 /// let pos = Position::new();
352 ///
353 /// assert_eq!(CommentToken::from_text("%", pos.clone()).unwrap().text(), "%");
354 /// assert_eq!(CommentToken::from_text("%% foo ", pos.clone()).unwrap().text(), "%% foo ");
355 /// ```
356 pub fn text(&self) -> &str {
357 &self.text
358 }
359}
360impl PositionRange for CommentToken {
361 fn start_position(&self) -> Position {
362 self.pos.clone()
363 }
364 fn end_position(&self) -> Position {
365 self.pos.clone().step_by_width(self.text.len())
366 }
367}
368impl fmt::Display for CommentToken {
369 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
370 self.text().fmt(f)
371 }
372}
373
374/// Floating point number token.
375///
376/// # Examples
377///
378/// ```
379/// use erl_tokenize::Position;
380/// use erl_tokenize::tokens::FloatToken;
381///
382/// let pos = Position::new();
383///
384/// // Ok
385/// assert_eq!(FloatToken::from_text("0.1", pos.clone()).unwrap().value(), 0.1);
386/// assert_eq!(FloatToken::from_text("12.3e-1 ", pos.clone()).unwrap().value(), 1.23);
387/// assert_eq!(FloatToken::from_text("1_2.3_4e-1_0", pos.clone()).unwrap().value(), 0.000000001234);
388/// assert_eq!(FloatToken::from_text("2#0.111", pos.clone()).unwrap().value(), 0.875);
389/// assert_eq!(FloatToken::from_text("2#0.10101#e8", pos.clone()).unwrap().value(), 168.0);
390/// assert_eq!(FloatToken::from_text("16#f_f.F_F", pos.clone()).unwrap().value(), 255.99609375);
391/// assert_eq!(FloatToken::from_text("1_6#fefe.fefe#e1_6", pos.clone()).unwrap().value(), 1.2041849337671418e24);
392/// assert_eq!(FloatToken::from_text("32#vrv.vrv#e15", pos.clone()).unwrap().value(), 1.2331041872800477e27);
393///
394/// // Err
395/// assert!(FloatToken::from_text("123", pos.clone()).is_err());
396/// assert!(FloatToken::from_text(".123", pos.clone()).is_err());
397/// assert!(FloatToken::from_text("10#.123", pos.clone()).is_err());
398/// assert!(FloatToken::from_text("1.", pos.clone()).is_err());
399/// assert!(FloatToken::from_text("10#1.", pos.clone()).is_err());
400/// assert!(FloatToken::from_text("12_.3", pos.clone()).is_err());
401/// assert!(FloatToken::from_text("10#12_.3", pos.clone()).is_err());
402/// assert!(FloatToken::from_text("12._3", pos.clone()).is_err());
403/// assert!(FloatToken::from_text("10#12._3", pos.clone()).is_err());
404/// assert!(FloatToken::from_text("12.3_", pos.clone()).is_err());
405/// assert!(FloatToken::from_text("10#12.3_", pos.clone()).is_err());
406/// assert!(FloatToken::from_text("1__2.3", pos.clone()).is_err());
407/// assert!(FloatToken::from_text("10#1__2.3", pos.clone()).is_err());
408/// assert!(FloatToken::from_text("12.3__4", pos.clone()).is_err());
409/// assert!(FloatToken::from_text("10#12.3__4", pos.clone()).is_err());
410/// assert!(FloatToken::from_text("10_#12.34", pos.clone()).is_err());
411/// assert!(FloatToken::from_text("12.34e-1__0", pos.clone()).is_err());
412/// ```
413#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
414pub struct FloatToken {
415 value: f64,
416 text: String,
417 pos: Position,
418}
419impl FloatToken {
420 /// Makes a new `FloatToken` instance from the value.
421 ///
422 /// # Examples
423 ///
424 /// ```
425 /// use erl_tokenize::Position;
426 /// use erl_tokenize::tokens::FloatToken;
427 ///
428 /// let pos = Position::new();
429 /// assert_eq!(FloatToken::from_value(1.23, pos.clone()).text(), "1.23");
430 /// ```
431 pub fn from_value(value: f64, pos: Position) -> Self {
432 let text = format!("{value}");
433 FloatToken { value, text, pos }
434 }
435
436 /// Tries to convert from any prefixes of the text to a `FloatToken`.
437 pub fn from_text(text: &str, pos: Position) -> Result<Self> {
438 if Self::is_based(text) {
439 return Self::from_text_radix(text, pos);
440 }
441
442 fn read_digits(
443 buf: &mut String,
444 chars: &mut std::iter::Peekable<impl Iterator<Item = (usize, char)>>,
445 pos: &Position,
446 ) -> Result<()> {
447 let mut needs_digit = true;
448 while let Some((_, c @ ('0'..='9' | '_'))) = chars.peek().cloned() {
449 if c == '_' {
450 if needs_digit {
451 break;
452 }
453 needs_digit = true;
454 } else {
455 buf.push(c);
456 needs_digit = false;
457 }
458 let _ = chars.next();
459 }
460 if needs_digit {
461 Err(Error::invalid_float_token(pos.clone()))
462 } else {
463 Ok(())
464 }
465 }
466
467 let mut chars = text.char_indices().peekable();
468 let mut buf = String::new();
469 read_digits(&mut buf, &mut chars, &pos)?;
470 if chars.next().map(|(_, c)| c) != Some('.') {
471 return Err(Error::invalid_float_token(pos));
472 }
473 buf.push('.');
474
475 read_digits(&mut buf, &mut chars, &pos)?;
476
477 if let Some((_, c @ ('e' | 'E'))) = chars.peek().cloned() {
478 let _ = chars.next();
479 buf.push(c);
480 if let Some((_, c @ ('+' | '-'))) = chars.peek().cloned() {
481 let _ = chars.next();
482 buf.push(c);
483 }
484 read_digits(&mut buf, &mut chars, &pos)?;
485 }
486
487 let end = chars.next().map(|(i, _)| i).unwrap_or_else(|| text.len());
488 let text = unsafe { text.get_unchecked(0..end) }.to_owned();
489 let value = buf
490 .parse()
491 .map_err(|_| Error::invalid_float_token(pos.clone()))?;
492 Ok(FloatToken { value, text, pos })
493 }
494
495 fn is_based(text: &str) -> bool {
496 for (i, c) in text.char_indices() {
497 if matches!(c, '0'..='9' | '_') {
498 continue;
499 }
500 if i > 0 && c == '#' {
501 return true;
502 }
503 break;
504 }
505 false
506 }
507
508 fn parse_digits<T: std::str::FromStr>(text: &str, pos: &Position) -> Result<T> {
509 let mut s = String::new();
510 let mut is_prev_digit = false;
511 for (i, c) in text.char_indices() {
512 if i == 0 && c == '-' {
513 s.push(c);
514 is_prev_digit = false;
515 } else if c.is_ascii_digit() {
516 s.push(c);
517 is_prev_digit = true;
518 } else if is_prev_digit && c == '_' {
519 is_prev_digit = false;
520 } else {
521 return Err(Error::invalid_float_token(pos.clone()));
522 }
523 }
524 if !is_prev_digit {
525 return Err(Error::invalid_float_token(pos.clone()));
526 }
527 s.parse::<T>()
528 .map_err(|_| Error::invalid_float_token(pos.clone()))
529 }
530
531 fn from_text_radix(text: &str, pos: Position) -> Result<Self> {
532 let s = text;
533 let i = s.find('#').expect("infallible");
534 let radix = Self::parse_digits(&s[..i], &pos)?;
535 if !(1 < radix && radix < 37) {
536 return Err(Error::invalid_float_token(pos));
537 }
538
539 let mut s = &s[i + 1..];
540 if s.is_empty() {
541 return Err(Error::invalid_float_token(pos));
542 }
543
544 let mut value = 0.0;
545 let mut is_prev_digit = false;
546 while let Some(c) = s.chars().next() {
547 s = &s[c.len_utf8()..];
548
549 if is_prev_digit && c == '_' {
550 is_prev_digit = false;
551 continue;
552 }
553 if is_prev_digit && c == '.' {
554 is_prev_digit = true;
555 break;
556 }
557 is_prev_digit = true;
558
559 let n = c
560 .to_digit(radix)
561 .ok_or_else(|| Error::invalid_float_token(pos.clone()))?;
562 value = value * radix as f64 + n as f64;
563 }
564 if !is_prev_digit || s.is_empty() {
565 return Err(Error::invalid_float_token(pos));
566 }
567
568 let mut is_prev_digit = false;
569 let mut j = 1;
570 let mut has_exp = false;
571 while let Some(c) = s.chars().next() {
572 if is_prev_digit && c == '_' {
573 s = &s[c.len_utf8()..];
574 is_prev_digit = false;
575 continue;
576 }
577 if is_prev_digit && c == '#' {
578 s = &s[c.len_utf8()..];
579 is_prev_digit = true;
580 has_exp = true;
581 break;
582 }
583
584 if let Some(n) = c.to_digit(radix) {
585 s = &s[c.len_utf8()..];
586 is_prev_digit = true;
587 value += n as f64 / (radix as f64).powi(j);
588 j += 1;
589 } else {
590 break;
591 }
592 }
593 if !is_prev_digit {
594 return Err(Error::invalid_float_token(pos));
595 }
596
597 if has_exp {
598 if !s.starts_with('e') {
599 return Err(Error::invalid_float_token(pos));
600 }
601 s = &s[1..];
602 let i = s
603 .char_indices()
604 .position(|(i, c)| !((i == 0 && c == '-') || matches!(c, '0'..='9' | '_')))
605 .unwrap_or(s.len());
606 let exp: i32 = Self::parse_digits(&s[..i], &pos)?;
607 value *= (radix as f64).powi(exp);
608 s = &s[i..];
609 }
610
611 let end = text.len() - s.len();
612 let text = unsafe { text.get_unchecked(0..end) }.to_owned();
613 Ok(FloatToken { value, text, pos })
614 }
615
616 /// Returns the value of this token.
617 ///
618 /// # Examples
619 ///
620 /// ```
621 /// use erl_tokenize::Position;
622 /// use erl_tokenize::tokens::FloatToken;
623 ///
624 /// let pos = Position::new();
625 ///
626 /// assert_eq!(FloatToken::from_text("0.1", pos.clone()).unwrap().value(), 0.1);
627 /// assert_eq!(FloatToken::from_text("12.3e-1", pos.clone()).unwrap().value(), 1.23);
628 /// ```
629 pub fn value(&self) -> f64 {
630 self.value
631 }
632
633 /// Returns the original textual representation of this token.
634 ///
635 /// # Examples
636 ///
637 /// ```
638 /// use erl_tokenize::Position;
639 /// use erl_tokenize::tokens::FloatToken;
640 ///
641 /// let pos = Position::new();
642 ///
643 /// assert_eq!(FloatToken::from_text("0.1", pos.clone()).unwrap().text(), "0.1");
644 /// assert_eq!(FloatToken::from_text("12.3e-1", pos.clone()).unwrap().text(), "12.3e-1");
645 /// ```
646 pub fn text(&self) -> &str {
647 &self.text
648 }
649}
650impl PositionRange for FloatToken {
651 fn start_position(&self) -> Position {
652 self.pos.clone()
653 }
654 fn end_position(&self) -> Position {
655 self.pos.clone().step_by_width(self.text.len())
656 }
657}
658impl fmt::Display for FloatToken {
659 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
660 self.text().fmt(f)
661 }
662}
663
664/// Integer token.
665///
666/// # Examples
667///
668/// ```
669/// # extern crate erl_tokenize;
670/// use erl_tokenize::Position;
671/// use erl_tokenize::tokens::IntegerToken;
672///
673/// # fn main() {
674/// let pos = Position::new();
675///
676/// // Ok
677/// assert_eq!(IntegerToken::from_text("10", pos.clone()).unwrap().value().try_into(),
678/// Ok(10u32));
679/// assert_eq!(IntegerToken::from_text("123_456", pos.clone()).unwrap().value().try_into(),
680/// Ok(123456));
681/// assert_eq!(IntegerToken::from_text("16#ab0e", pos.clone()).unwrap().value().try_into(),
682/// Ok(0xab0e));
683/// assert_eq!(IntegerToken::from_text("1_6#a_b_0e", pos.clone()).unwrap().value().try_into(),
684/// Ok(0xab0e));
685///
686/// // Err
687/// assert!(IntegerToken::from_text("-10", pos.clone()).is_err());
688/// assert!(IntegerToken::from_text("123_456_", pos.clone()).is_err());
689/// assert!(IntegerToken::from_text("123__456", pos.clone()).is_err());
690/// # }
691/// ```
692#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
693pub struct IntegerToken {
694 value: BigUint,
695 text: String,
696 pos: Position,
697}
698impl IntegerToken {
699 /// Makes a new `IntegerToken` instance from the value.
700 ///
701 /// # Examples
702 ///
703 /// ```
704 /// use erl_tokenize::Position;
705 /// use erl_tokenize::tokens::IntegerToken;
706 ///
707 /// let pos = Position::new();
708 /// assert_eq!(IntegerToken::from_value(123u32.into(), pos.clone()).text(), "123");
709 /// ```
710 pub fn from_value(value: BigUint, pos: Position) -> Self {
711 let text = format!("{value}");
712 IntegerToken { value, text, pos }
713 }
714
715 /// Tries to convert from any prefixes of the text to an `IntegerToken`.
716 pub fn from_text(text: &str, pos: Position) -> Result<Self> {
717 let mut has_radix = false;
718 let mut radix = 10;
719 let mut chars = text.char_indices().peekable();
720 let mut digits = String::new();
721 let mut needs_digit = true;
722 while let Some((_, c)) = chars.peek().cloned() {
723 if c == '#' && !has_radix && !needs_digit {
724 radix = digits
725 .parse()
726 .map_err(|_| Error::invalid_integer_token(pos.clone()))?;
727 if !(1 < radix && radix < 37) {
728 return Err(Error::invalid_integer_token(pos));
729 }
730 digits.clear();
731 needs_digit = true;
732 has_radix = true;
733 } else if c.is_digit(radix) {
734 digits.push(c);
735 needs_digit = false;
736 } else if c == '_' && !needs_digit {
737 needs_digit = true;
738 } else {
739 break;
740 }
741 chars.next();
742 }
743 if needs_digit {
744 return Err(Error::invalid_integer_token(pos));
745 }
746
747 let end = chars.peek().map(|&(i, _)| i).unwrap_or_else(|| text.len());
748 let value = BigUint::parse_bytes(digits.as_bytes(), radix)
749 .ok_or_else(|| Error::invalid_integer_token(pos.clone()))?;
750 let text = unsafe { text.get_unchecked(0..end) }.to_owned();
751 Ok(IntegerToken { value, text, pos })
752 }
753
754 /// Returns the value of this token.
755 ///
756 /// # Examples
757 ///
758 /// ```
759 /// # extern crate erl_tokenize;
760 /// use erl_tokenize::Position;
761 /// use erl_tokenize::tokens::IntegerToken;
762 ///
763 /// # fn main() {
764 /// let pos = Position::new();
765 ///
766 /// assert_eq!(IntegerToken::from_text("10", pos.clone()).unwrap().value().try_into(),
767 /// Ok(10u32));
768 /// assert_eq!(IntegerToken::from_text("16#ab0e", pos.clone()).unwrap().value().try_into(),
769 /// Ok(0xab0e));
770 /// # }
771 /// ```
772 pub fn value(&self) -> &BigUint {
773 &self.value
774 }
775
776 /// Returns the original textual representation of this token.
777 ///
778 /// # Examples
779 ///
780 /// ```
781 /// use erl_tokenize::Position;
782 /// use erl_tokenize::tokens::IntegerToken;
783 ///
784 /// let pos = Position::new();
785 ///
786 /// assert_eq!(IntegerToken::from_text("10", pos.clone()).unwrap().text(), "10");
787 /// assert_eq!(IntegerToken::from_text("16#ab0e", pos.clone()).unwrap().text(), "16#ab0e");
788 /// ```
789 pub fn text(&self) -> &str {
790 &self.text
791 }
792}
793impl PositionRange for IntegerToken {
794 fn start_position(&self) -> Position {
795 self.pos.clone()
796 }
797 fn end_position(&self) -> Position {
798 self.pos.clone().step_by_width(self.text.len())
799 }
800}
801impl fmt::Display for IntegerToken {
802 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
803 self.text().fmt(f)
804 }
805}
806
807/// Keyword token.
808///
809/// # Examples
810///
811/// ```
812/// use erl_tokenize::Position;
813/// use erl_tokenize::tokens::KeywordToken;
814/// use erl_tokenize::values::Keyword;
815///
816/// let pos = Position::new();
817///
818/// // Ok
819/// assert_eq!(KeywordToken::from_text("receive", pos.clone()).unwrap().value(), Keyword::Receive);
820/// assert_eq!(KeywordToken::from_text("and ", pos.clone()).unwrap().value(), Keyword::And);
821///
822/// // Err
823/// assert!(KeywordToken::from_text("foo", pos.clone()).is_err());
824/// assert!(KeywordToken::from_text(" and", pos.clone()).is_err());
825/// assert!(KeywordToken::from_text("andfoo", pos.clone()).is_err());
826/// ```
827#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
828pub struct KeywordToken {
829 value: Keyword,
830 pos: Position,
831}
832impl KeywordToken {
833 /// Makes a new `KeywordToken` instance from the value.
834 ///
835 /// # Examples
836 ///
837 /// ```
838 /// use erl_tokenize::Position;
839 /// use erl_tokenize::tokens::KeywordToken;
840 /// use erl_tokenize::values::Keyword;
841 ///
842 /// let pos = Position::new();
843 /// assert_eq!(KeywordToken::from_value(Keyword::Case, pos.clone()).text(), "case");
844 /// ```
845 pub fn from_value(value: Keyword, pos: Position) -> Self {
846 KeywordToken { value, pos }
847 }
848
849 /// Tries to convert from any prefixes of the text to a `KeywordToken`.
850 pub fn from_text(text: &str, pos: Position) -> Result<Self> {
851 let atom = AtomToken::from_text(text, pos.clone())?;
852 let value = match atom.text() {
853 "after" => Keyword::After,
854 "and" => Keyword::And,
855 "andalso" => Keyword::Andalso,
856 "band" => Keyword::Band,
857 "begin" => Keyword::Begin,
858 "bnot" => Keyword::Bnot,
859 "bor" => Keyword::Bor,
860 "bsl" => Keyword::Bsl,
861 "bsr" => Keyword::Bsr,
862 "bxor" => Keyword::Bxor,
863 "case" => Keyword::Case,
864 "catch" => Keyword::Catch,
865 "cond" => Keyword::Cond,
866 "div" => Keyword::Div,
867 "end" => Keyword::End,
868 "fun" => Keyword::Fun,
869 "if" => Keyword::If,
870 "let" => Keyword::Let,
871 "not" => Keyword::Not,
872 "of" => Keyword::Of,
873 "or" => Keyword::Or,
874 "orelse" => Keyword::Orelse,
875 "receive" => Keyword::Receive,
876 "rem" => Keyword::Rem,
877 "try" => Keyword::Try,
878 "when" => Keyword::When,
879 "xor" => Keyword::Xor,
880 "maybe" => Keyword::Maybe,
881 "else" => Keyword::Else,
882 s => return Err(Error::unknown_keyword(pos, s.to_owned())),
883 };
884 Ok(KeywordToken { value, pos })
885 }
886
887 /// Returns the value of this token.
888 ///
889 /// # Examples
890 ///
891 /// ```
892 /// use erl_tokenize::Position;
893 /// use erl_tokenize::tokens::KeywordToken;
894 /// use erl_tokenize::values::Keyword;
895 ///
896 /// let pos = Position::new();
897 ///
898 /// assert_eq!(KeywordToken::from_text("receive", pos.clone()).unwrap().value(),
899 /// Keyword::Receive);
900 /// assert_eq!(KeywordToken::from_text("and ", pos.clone()).unwrap().value(),
901 /// Keyword::And);
902 /// ```
903 pub fn value(&self) -> Keyword {
904 self.value
905 }
906
907 /// Returns the original textual representation of this token.
908 ///
909 /// # Examples
910 ///
911 /// ```
912 /// use erl_tokenize::Position;
913 /// use erl_tokenize::tokens::KeywordToken;
914 ///
915 /// let pos = Position::new();
916 ///
917 /// assert_eq!(KeywordToken::from_text("receive", pos.clone()).unwrap().text(), "receive");
918 /// assert_eq!(KeywordToken::from_text("and ", pos.clone()).unwrap().text(), "and");
919 /// ```
920 pub fn text(&self) -> &'static str {
921 self.value.as_str()
922 }
923}
924impl PositionRange for KeywordToken {
925 fn start_position(&self) -> Position {
926 self.pos.clone()
927 }
928 fn end_position(&self) -> Position {
929 self.pos.clone().step_by_width(self.text().len())
930 }
931}
932impl fmt::Display for KeywordToken {
933 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
934 self.text().fmt(f)
935 }
936}
937
938/// Sigil string token.
939///
940/// # Examples
941///
942/// ```
943/// use erl_tokenize::Position;
944/// use erl_tokenize::tokens::SigilStringToken;
945///
946/// # fn main() -> erl_tokenize::Result<()> {
947/// let pos = Position::new();
948///
949/// // Ok
950/// assert_eq!(SigilStringToken::from_text(r#"~"foo""#, pos.clone())?.value(), ("", "foo", ""));
951/// assert_eq!(SigilStringToken::from_text(r#"~(foo)"#, pos.clone())?.value(), ("", "foo", ""));
952/// assert_eq!(SigilStringToken::from_text(r#"~b"foo" "#, pos.clone())?.value(), ("b", "foo", ""));
953///
954/// // Err
955/// assert!(SigilStringToken::from_text(r#""foo""#, pos.clone()).is_err());
956/// # Ok(())
957/// # }
958/// ```
959#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
960pub struct SigilStringToken {
961 prefix: String,
962 content: String,
963 suffix: String,
964 text: String,
965 pos: Position,
966}
967
968impl SigilStringToken {
969 /// Returns the value (i.e., prefix, content, suffix) of this token.
970 ///
971 /// # Examples
972 ///
973 /// ```
974 /// use erl_tokenize::Position;
975 /// use erl_tokenize::tokens::SigilStringToken;
976 ///
977 /// # fn main() -> erl_tokenize::Result<()> {
978 /// let pos = Position::new();
979 ///
980 /// assert_eq!(SigilStringToken::from_text(r#"~"foo""#, pos.clone())?.value(), ("", "foo", ""));
981 /// assert_eq!(SigilStringToken::from_text(r#"~(foo)"#, pos.clone())?.value(), ("", "foo", ""));
982 /// assert_eq!(SigilStringToken::from_text(r#"~b"foo" "#, pos.clone())?.value(), ("b", "foo", ""));
983 /// # Ok(())
984 /// # }
985 /// ```
986 pub fn value(&self) -> (&str, &str, &str) {
987 (&self.prefix, &self.content, &self.suffix)
988 }
989
990 /// Returns the original textual representation of this token.
991 ///
992 /// # Examples
993 ///
994 /// ```
995 /// use erl_tokenize::Position;
996 /// use erl_tokenize::tokens::SigilStringToken;
997 ///
998 /// # fn main() -> erl_tokenize::Result<()> {
999 /// let pos = Position::new();
1000 ///
1001 /// assert_eq!(SigilStringToken::from_text(r#"~"foo""#, pos.clone())?.text(), r#"~"foo""#);
1002 /// assert_eq!(SigilStringToken::from_text(r#"~(foo)"#, pos.clone())?.text(), r#"~(foo)"#);
1003 /// assert_eq!(SigilStringToken::from_text(r#"~b"foo" "#, pos.clone())?.text(), r#"~b"foo""#);
1004 /// # Ok(())
1005 /// # }
1006 /// ```
1007 pub fn text(&self) -> &str {
1008 &self.text
1009 }
1010
1011 /// Tries to convert from any prefixes of the text to a [`SigilStringToken`].
1012 pub fn from_text(text: &str, pos: Position) -> Result<Self> {
1013 if !text.starts_with('~') {
1014 return Err(Error::invalid_sigil_string_token(pos));
1015 }
1016
1017 let offset = 1;
1018 let prefix: String = text[offset..]
1019 .chars()
1020 .take_while(|c| util::is_atom_non_head_char(*c))
1021 .collect();
1022
1023 let offset = offset + prefix.len();
1024 let Some(open_delimiter) = text[offset..].chars().next() else {
1025 return Err(Error::invalid_sigil_string_token(pos));
1026 };
1027 let (content, offset) = if open_delimiter == '"' {
1028 let t = StringToken::from_text(&text[offset..], pos.clone().step_by_width(offset))?;
1029 let content = t.value().to_owned();
1030 (content, offset + t.text().len())
1031 } else {
1032 let close_delimiter = match open_delimiter {
1033 '(' => ')',
1034 '[' => ']',
1035 '{' => '}',
1036 '<' => '>',
1037 '/' | '|' | '\'' | '`' | '#' => open_delimiter,
1038 _ => return Err(Error::invalid_sigil_string_token(pos)),
1039 };
1040 util::parse_quotation(pos.clone(), &text[offset + 1..], close_delimiter)
1041 .map(|(v, end)| (v.into_owned(), offset + 1 + end + 1))?
1042 };
1043
1044 let suffix: String = text[offset..]
1045 .chars()
1046 .take_while(|c| util::is_atom_non_head_char(*c))
1047 .collect();
1048 let offset = offset + suffix.len();
1049
1050 Ok(Self {
1051 prefix,
1052 content,
1053 suffix,
1054 text: text[..offset].to_owned(),
1055 pos,
1056 })
1057 }
1058}
1059
1060impl PositionRange for SigilStringToken {
1061 fn start_position(&self) -> Position {
1062 self.pos.clone()
1063 }
1064
1065 fn end_position(&self) -> Position {
1066 self.pos.clone().step_by_text(&self.text)
1067 }
1068}
1069
1070impl fmt::Display for SigilStringToken {
1071 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1072 self.text().fmt(f)
1073 }
1074}
1075
1076/// String token.
1077///
1078/// # Examples
1079///
1080/// ```
1081/// use erl_tokenize::Position;
1082/// use erl_tokenize::tokens::StringToken;
1083///
1084/// let pos = Position::new();
1085///
1086/// // Ok
1087/// assert_eq!(StringToken::from_text(r#""foo""#, pos.clone()).unwrap().value(), "foo");
1088/// assert_eq!(StringToken::from_text(r#""foo" "#, pos.clone()).unwrap().value(), "foo");
1089/// assert_eq!(StringToken::from_text(r#""f\x6Fo""#, pos.clone()).unwrap().value(), "foo");
1090///
1091/// // Err
1092/// assert!(StringToken::from_text(r#" "foo""#, pos.clone()).is_err());
1093/// ```
1094#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1095pub struct StringToken {
1096 value: Option<String>,
1097 text: String,
1098 pos: Position,
1099}
1100impl StringToken {
1101 /// Makes a new `StringToken` instance from the value.
1102 ///
1103 /// # Examples
1104 ///
1105 /// ```
1106 /// use erl_tokenize::Position;
1107 /// use erl_tokenize::tokens::StringToken;
1108 ///
1109 /// let pos = Position::new();
1110 /// assert_eq!(StringToken::from_value("foo", pos.clone()).text(), r#""foo""#);
1111 /// ```
1112 pub fn from_value(value: &str, pos: Position) -> Self {
1113 let text = format!("{value:?}");
1114 StringToken {
1115 value: Some(value.to_string()),
1116 text,
1117 pos,
1118 }
1119 }
1120
1121 /// Tries to convert from any prefixes of the text to a `StringToken`.
1122 pub fn from_text(text: &str, pos: Position) -> Result<Self> {
1123 if text.is_empty() {
1124 return Err(Error::invalid_string_token(pos));
1125 }
1126
1127 let (value, end) = if text.starts_with(r#"""""#) {
1128 // Triple-quoted strings: https://www.erlang.org/eeps/eep-0064
1129 Self::parse_triple_quoted(text, pos.clone())?
1130 } else {
1131 let (head, tail) = text.split_at(1);
1132 if head != "\"" {
1133 return Err(Error::invalid_string_token(pos));
1134 }
1135 util::parse_quotation(pos.clone(), tail, '"').map(|(v, end)| (v, end + 2))?
1136 };
1137 if text.get(end..end + 1) == Some("\"") {
1138 let pos = pos.step_by_text(&text[0..end]);
1139 return Err(Error::adjacent_string_literals(pos));
1140 }
1141
1142 let value = match value {
1143 Cow::Borrowed(_) => None,
1144 Cow::Owned(v) => Some(v),
1145 };
1146 let text = unsafe { text.get_unchecked(0..end) }.to_owned();
1147 Ok(StringToken { value, text, pos })
1148 }
1149
1150 fn parse_triple_quoted(text: &str, pos: Position) -> Result<(Cow<'_, str>, usize)> {
1151 let mut quote_count = 0;
1152 let mut chars = text.chars().peekable();
1153 let mut start_line_end = 0;
1154
1155 while let Some(c) = chars.peek().copied() {
1156 if c == '"' {
1157 quote_count += 1;
1158 start_line_end += chars.next().expect("unreachable").len_utf8();
1159 } else {
1160 break;
1161 }
1162 }
1163
1164 let mut start_line_end_found = false;
1165 for c in chars {
1166 start_line_end += c.len_utf8();
1167 if c == '\n' {
1168 start_line_end_found = true;
1169 break;
1170 } else if !c.is_ascii_whitespace() {
1171 return Err(Error::invalid_string_token(pos));
1172 }
1173 }
1174 if !start_line_end_found {
1175 return Err(Error::no_closing_quotation(pos));
1176 }
1177
1178 let mut indent = 0;
1179 let mut maybe_end_line = true;
1180 let mut remaining_quote_count = quote_count;
1181 let mut end_line_start = start_line_end;
1182 let mut end_line_end = start_line_end;
1183 for c in text[start_line_end..].chars() {
1184 end_line_end += c.len_utf8();
1185 if c == '\n' {
1186 indent = 0;
1187 maybe_end_line = true;
1188 remaining_quote_count = quote_count;
1189 end_line_start = end_line_end;
1190 } else if c.is_ascii_whitespace() {
1191 indent += 1;
1192 } else if maybe_end_line && c == '"' {
1193 remaining_quote_count -= 1;
1194 if remaining_quote_count == 0 {
1195 break;
1196 }
1197 } else {
1198 maybe_end_line = false;
1199 }
1200 }
1201 if remaining_quote_count != 0 {
1202 return Err(Error::no_closing_quotation(pos));
1203 }
1204
1205 if indent == 0 {
1206 return Ok((
1207 Cow::Owned(
1208 text[start_line_end..(end_line_start - 1).max(start_line_end)].to_owned(),
1209 ),
1210 end_line_end,
1211 ));
1212 }
1213
1214 let mut value = String::new();
1215 for line in text[start_line_end..end_line_start - 1].lines() {
1216 if line == "\n" {
1217 value.push('\n');
1218 continue;
1219 }
1220
1221 let mut valid_line = false;
1222 for (i, c) in line.chars().enumerate() {
1223 if i < indent {
1224 if c.is_ascii_whitespace() {
1225 continue;
1226 } else {
1227 return Err(Error::invalid_string_token(pos));
1228 }
1229 }
1230 value.push(c);
1231 valid_line = true;
1232 }
1233 if !valid_line {
1234 return Err(Error::invalid_string_token(pos));
1235 }
1236 }
1237
1238 Ok((Cow::Owned(value), end_line_end))
1239 }
1240
1241 /// Returns the value of this token.
1242 ///
1243 /// # Examples
1244 ///
1245 /// ```
1246 /// use erl_tokenize::Position;
1247 /// use erl_tokenize::tokens::StringToken;
1248 ///
1249 /// let pos = Position::new();
1250 ///
1251 /// assert_eq!(StringToken::from_text(r#""foo""#, pos.clone()).unwrap().value(), "foo");
1252 /// assert_eq!(StringToken::from_text(r#""foo" "#, pos.clone()).unwrap().value(), "foo");
1253 /// assert_eq!(StringToken::from_text(r#""f\x6Fo""#, pos.clone()).unwrap().value(), "foo");
1254 /// ```
1255 pub fn value(&self) -> &str {
1256 if let Some(v) = self.value.as_ref() {
1257 v
1258 } else {
1259 let len = self.text.len();
1260 unsafe { self.text.get_unchecked(1..len - 1) }
1261 }
1262 }
1263
1264 /// Returns the original textual representation of this token.
1265 ///
1266 /// # Examples
1267 ///
1268 /// ```
1269 /// use erl_tokenize::Position;
1270 /// use erl_tokenize::tokens::StringToken;
1271 ///
1272 /// let pos = Position::new();
1273 ///
1274 /// assert_eq!(StringToken::from_text(r#""foo""#, pos.clone()).unwrap().text(),
1275 /// r#""foo""#);
1276 /// assert_eq!(StringToken::from_text(r#""foo" "#, pos.clone()).unwrap().text(),
1277 /// r#""foo""#);
1278 /// assert_eq!(StringToken::from_text(r#""f\x6Fo""#, pos.clone()).unwrap().text(),
1279 /// r#""f\x6Fo""#);
1280 /// ```
1281 pub fn text(&self) -> &str {
1282 &self.text
1283 }
1284}
1285impl PositionRange for StringToken {
1286 fn start_position(&self) -> Position {
1287 self.pos.clone()
1288 }
1289 fn end_position(&self) -> Position {
1290 self.pos.clone().step_by_text(&self.text)
1291 }
1292}
1293impl fmt::Display for StringToken {
1294 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1295 self.text().fmt(f)
1296 }
1297}
1298
1299/// Symbol token.
1300///
1301/// # Examples
1302///
1303/// ```
1304/// use erl_tokenize::Position;
1305/// use erl_tokenize::tokens::SymbolToken;
1306/// use erl_tokenize::values::Symbol;
1307///
1308/// let pos = Position::new();
1309///
1310/// // Ok
1311/// assert_eq!(SymbolToken::from_text(".", pos.clone()).unwrap().value(), Symbol::Dot);
1312/// assert_eq!(SymbolToken::from_text(":= ", pos.clone()).unwrap().value(), Symbol::MapMatch);
1313///
1314/// // Err
1315/// assert!(SymbolToken::from_text(" .", pos.clone()).is_err());
1316/// assert!(SymbolToken::from_text("foo", pos.clone()).is_err());
1317/// ```
1318#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1319pub struct SymbolToken {
1320 value: Symbol,
1321 pos: Position,
1322}
1323impl SymbolToken {
1324 /// Makes a new `SymbolToken` instance from the value.
1325 ///
1326 /// # Examples
1327 ///
1328 /// ```
1329 /// use erl_tokenize::Position;
1330 /// use erl_tokenize::tokens::SymbolToken;
1331 /// use erl_tokenize::values::Symbol;
1332 ///
1333 /// let pos = Position::new();
1334 /// assert_eq!(SymbolToken::from_value(Symbol::Dot, pos.clone()).text(), ".");
1335 /// ```
1336 pub fn from_value(value: Symbol, pos: Position) -> Self {
1337 SymbolToken { value, pos }
1338 }
1339
1340 /// Tries to convert from any prefixes of the text to a `SymbolToken`.
1341 pub fn from_text(text: &str, pos: Position) -> Result<Self> {
1342 let bytes = text.as_bytes();
1343 let mut symbol = if bytes.len() >= 3 {
1344 match &bytes[0..3] {
1345 b"=:=" => Some(Symbol::ExactEq),
1346 b"=/=" => Some(Symbol::ExactNotEq),
1347 b"..." => Some(Symbol::TripleDot),
1348 b"<:-" => Some(Symbol::StrictLeftArrow),
1349 b"<:=" => Some(Symbol::StrictDoubleLeftArrow),
1350 _ => None,
1351 }
1352 } else {
1353 None
1354 };
1355 if symbol.is_none() && bytes.len() >= 2 {
1356 symbol = match &bytes[0..2] {
1357 b"::" => Some(Symbol::DoubleColon),
1358 b":=" => Some(Symbol::MapMatch),
1359 b"||" => Some(Symbol::DoubleVerticalBar),
1360 b"--" => Some(Symbol::MinusMinus),
1361 b"++" => Some(Symbol::PlusPlus),
1362 b"->" => Some(Symbol::RightArrow),
1363 b"<-" => Some(Symbol::LeftArrow),
1364 b"=>" => Some(Symbol::DoubleRightArrow),
1365 b"<=" => Some(Symbol::DoubleLeftArrow),
1366 b">>" => Some(Symbol::DoubleRightAngle),
1367 b"<<" => Some(Symbol::DoubleLeftAngle),
1368 b"==" => Some(Symbol::Eq),
1369 b"/=" => Some(Symbol::NotEq),
1370 b">=" => Some(Symbol::GreaterEq),
1371 b"=<" => Some(Symbol::LessEq),
1372 b"??" => Some(Symbol::DoubleQuestion),
1373 b"?=" => Some(Symbol::MaybeMatch),
1374 b".." => Some(Symbol::DoubleDot),
1375 b"&&" => Some(Symbol::DoubleAmpersand),
1376 _ => None,
1377 };
1378 }
1379 if symbol.is_none() && !bytes.is_empty() {
1380 symbol = match bytes[0] {
1381 b'[' => Some(Symbol::OpenSquare),
1382 b']' => Some(Symbol::CloseSquare),
1383 b'(' => Some(Symbol::OpenParen),
1384 b')' => Some(Symbol::CloseParen),
1385 b'{' => Some(Symbol::OpenBrace),
1386 b'}' => Some(Symbol::CloseBrace),
1387 b'#' => Some(Symbol::Sharp),
1388 b'/' => Some(Symbol::Slash),
1389 b'.' => Some(Symbol::Dot),
1390 b',' => Some(Symbol::Comma),
1391 b':' => Some(Symbol::Colon),
1392 b';' => Some(Symbol::Semicolon),
1393 b'=' => Some(Symbol::Match),
1394 b'|' => Some(Symbol::VerticalBar),
1395 b'?' => Some(Symbol::Question),
1396 b'!' => Some(Symbol::Not),
1397 b'-' => Some(Symbol::Hyphen),
1398 b'+' => Some(Symbol::Plus),
1399 b'*' => Some(Symbol::Multiply),
1400 b'>' => Some(Symbol::Greater),
1401 b'<' => Some(Symbol::Less),
1402 _ => None,
1403 };
1404 }
1405 if let Some(value) = symbol {
1406 Ok(SymbolToken { value, pos })
1407 } else {
1408 Err(Error::invalid_symbol_token(pos))
1409 }
1410 }
1411
1412 /// Returns the value of this token.
1413 ///
1414 /// # Examples
1415 ///
1416 /// ```
1417 /// use erl_tokenize::Position;
1418 /// use erl_tokenize::tokens::SymbolToken;
1419 /// use erl_tokenize::values::Symbol;
1420 ///
1421 /// let pos = Position::new();
1422 ///
1423 /// assert_eq!(SymbolToken::from_text(".", pos.clone()).unwrap().value(), Symbol::Dot);
1424 /// assert_eq!(SymbolToken::from_text(":= ", pos.clone()).unwrap().value(), Symbol::MapMatch);
1425 /// ```
1426 pub fn value(&self) -> Symbol {
1427 self.value
1428 }
1429
1430 /// Returns the original textual representation of this token.
1431 ///
1432 /// # Examples
1433 ///
1434 /// ```
1435 /// use erl_tokenize::Position;
1436 /// use erl_tokenize::tokens::SymbolToken;
1437 ///
1438 /// let pos = Position::new();
1439 ///
1440 /// assert_eq!(SymbolToken::from_text(".", pos.clone()).unwrap().text(), ".");
1441 /// assert_eq!(SymbolToken::from_text(":= ", pos.clone()).unwrap().text(), ":=");
1442 /// ```
1443 pub fn text(&self) -> &'static str {
1444 self.value.as_str()
1445 }
1446}
1447impl PositionRange for SymbolToken {
1448 fn start_position(&self) -> Position {
1449 self.pos.clone()
1450 }
1451 fn end_position(&self) -> Position {
1452 self.pos.clone().step_by_width(self.text().len())
1453 }
1454}
1455impl fmt::Display for SymbolToken {
1456 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1457 self.text().fmt(f)
1458 }
1459}
1460
1461/// Variable token.
1462///
1463/// # Examples
1464///
1465/// ```
1466/// use erl_tokenize::Position;
1467/// use erl_tokenize::tokens::VariableToken;
1468///
1469/// let pos = Position::new();
1470///
1471/// // Ok
1472/// assert_eq!(VariableToken::from_text("Foo", pos.clone()).unwrap().value(), "Foo");
1473/// assert_eq!(VariableToken::from_text("_ ", pos.clone()).unwrap().value(), "_");
1474/// assert_eq!(VariableToken::from_text("_foo@bar", pos.clone()).unwrap().value(), "_foo@bar");
1475///
1476/// // Err
1477/// assert!(VariableToken::from_text("foo", pos.clone()).is_err());
1478/// assert!(VariableToken::from_text(" Foo", pos.clone()).is_err());
1479/// ```
1480#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1481pub struct VariableToken {
1482 text: String,
1483 pos: Position,
1484}
1485impl VariableToken {
1486 /// Makes a new `VariableToken` instance from the value.
1487 ///
1488 /// # Examples
1489 ///
1490 /// ```
1491 /// use erl_tokenize::Position;
1492 /// use erl_tokenize::tokens::VariableToken;
1493 ///
1494 /// let pos = Position::new();
1495 /// assert_eq!(VariableToken::from_value("Foo", pos.clone()).unwrap().text(), "Foo");
1496 /// ```
1497 pub fn from_value(value: &str, pos: Position) -> Result<Self> {
1498 let var = Self::from_text(value, pos.clone())?;
1499 if var.text().len() != value.len() {
1500 Err(Error::invalid_variable_token(pos))
1501 } else {
1502 Ok(var)
1503 }
1504 }
1505
1506 /// Tries to convert from any prefixes of the text to a `VariableToken`.
1507 pub fn from_text(text: &str, pos: Position) -> Result<Self> {
1508 let mut chars = text.char_indices();
1509 let (_, head) = chars
1510 .next()
1511 .ok_or_else(|| Error::invalid_variable_token(pos.clone()))?;
1512 if !util::is_variable_head_char(head) {
1513 return Err(Error::invalid_variable_token(pos));
1514 }
1515 let end = chars
1516 .find(|&(_, c)| !util::is_variable_non_head_char(c))
1517 .map(|(i, _)| i)
1518 .unwrap_or_else(|| text.len());
1519 let text = unsafe { text.get_unchecked(0..end) }.to_owned();
1520 Ok(VariableToken { text, pos })
1521 }
1522
1523 /// Returns the value of this token.
1524 ///
1525 /// # Examples
1526 ///
1527 /// ```
1528 /// use erl_tokenize::Position;
1529 /// use erl_tokenize::tokens::VariableToken;
1530 ///
1531 /// let pos = Position::new();
1532 ///
1533 /// assert_eq!(VariableToken::from_text("Foo", pos.clone()).unwrap().value(), "Foo");
1534 /// assert_eq!(VariableToken::from_text("_foo ", pos.clone()).unwrap().value(), "_foo");
1535 /// ```
1536 pub fn value(&self) -> &str {
1537 &self.text
1538 }
1539
1540 /// Returns the original textual representation of this token.
1541 ///
1542 /// # Examples
1543 ///
1544 /// ```
1545 /// use erl_tokenize::Position;
1546 /// use erl_tokenize::tokens::VariableToken;
1547 ///
1548 /// let pos = Position::new();
1549 ///
1550 /// assert_eq!(VariableToken::from_text("Foo", pos.clone()).unwrap().text(), "Foo");
1551 /// assert_eq!(VariableToken::from_text("_foo ", pos.clone()).unwrap().text(), "_foo");
1552 /// ```
1553 pub fn text(&self) -> &str {
1554 &self.text
1555 }
1556}
1557impl PositionRange for VariableToken {
1558 fn start_position(&self) -> Position {
1559 self.pos.clone()
1560 }
1561 fn end_position(&self) -> Position {
1562 self.pos.clone().step_by_width(self.text.len())
1563 }
1564}
1565impl fmt::Display for VariableToken {
1566 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1567 self.text().fmt(f)
1568 }
1569}
1570
1571/// Whitespace token.
1572///
1573/// # Examples
1574///
1575/// ```
1576/// use erl_tokenize::Position;
1577/// use erl_tokenize::tokens::WhitespaceToken;
1578/// use erl_tokenize::values::Whitespace;
1579///
1580/// let pos = Position::new();
1581///
1582/// // Ok
1583/// assert_eq!(WhitespaceToken::from_text(" ", pos.clone()).unwrap().value(), Whitespace::Space);
1584/// assert_eq!(WhitespaceToken::from_text("\t ", pos.clone()).unwrap().value(), Whitespace::Tab);
1585///
1586/// // Err
1587/// assert!(WhitespaceToken::from_text("foo", pos.clone()).is_err());
1588/// ```
1589#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
1590pub struct WhitespaceToken {
1591 value: Whitespace,
1592 pos: Position,
1593}
1594impl WhitespaceToken {
1595 /// Makes a new `WhitespaceToken` instance from the value.
1596 ///
1597 /// # Examples
1598 ///
1599 /// ```
1600 /// use erl_tokenize::Position;
1601 /// use erl_tokenize::tokens::WhitespaceToken;
1602 /// use erl_tokenize::values::Whitespace;
1603 ///
1604 /// let pos = Position::new();
1605 /// assert_eq!(WhitespaceToken::from_value(Whitespace::Space, pos.clone()).text(), " ");
1606 /// ```
1607 pub fn from_value(value: Whitespace, pos: Position) -> Self {
1608 WhitespaceToken { value, pos }
1609 }
1610
1611 /// Tries to convert from any prefixes of the text to a `WhitespaceToken`.
1612 pub fn from_text(text: &str, pos: Position) -> Result<Self> {
1613 let value = if let Some(c) = text.chars().next() {
1614 match c {
1615 ' ' => Whitespace::Space,
1616 '\t' => Whitespace::Tab,
1617 '\r' => Whitespace::Return,
1618 '\n' => Whitespace::Newline,
1619 '\u{a0}' => Whitespace::NoBreakSpace,
1620 _ => return Err(Error::invalid_whitespace_token(pos)),
1621 }
1622 } else {
1623 return Err(Error::invalid_whitespace_token(pos));
1624 };
1625 Ok(WhitespaceToken { value, pos })
1626 }
1627
1628 /// Returns the value of this token.
1629 ///
1630 /// # Examples
1631 ///
1632 /// ```
1633 /// use erl_tokenize::Position;
1634 /// use erl_tokenize::tokens::WhitespaceToken;
1635 /// use erl_tokenize::values::Whitespace;
1636 ///
1637 /// let pos = Position::new();
1638 ///
1639 /// assert_eq!(WhitespaceToken::from_text(" ", pos.clone()).unwrap().value(),
1640 /// Whitespace::Space);
1641 /// assert_eq!(WhitespaceToken::from_text("\t ", pos.clone()).unwrap().value(),
1642 /// Whitespace::Tab);
1643 /// ```
1644 pub fn value(&self) -> Whitespace {
1645 self.value
1646 }
1647
1648 /// Returns the original textual representation of this token.
1649 ///
1650 /// # Examples
1651 ///
1652 /// ```
1653 /// use erl_tokenize::Position;
1654 /// use erl_tokenize::tokens::WhitespaceToken;
1655 ///
1656 /// let pos = Position::new();
1657 ///
1658 /// assert_eq!(WhitespaceToken::from_text(" ", pos.clone()).unwrap().text(), " ");
1659 /// assert_eq!(WhitespaceToken::from_text("\t ", pos.clone()).unwrap().text(), "\t");
1660 /// ```
1661 pub fn text(&self) -> &'static str {
1662 self.value.as_str()
1663 }
1664}
1665impl PositionRange for WhitespaceToken {
1666 fn start_position(&self) -> Position {
1667 self.pos.clone()
1668 }
1669 fn end_position(&self) -> Position {
1670 self.pos.clone().step_by_text(self.text())
1671 }
1672}
1673impl fmt::Display for WhitespaceToken {
1674 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1675 self.text().fmt(f)
1676 }
1677}