1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
use std::path::Path; use {Result, Token, Position, PositionRange}; /// Tokenizer. /// /// This is an iterator which tokenizes Erlang source code and iterates on the resulting tokens. /// /// # Examples /// /// ``` /// use erl_tokenize::Tokenizer; /// /// let src = r#"io:format("Hello")."#; /// let tokens = Tokenizer::new(src).collect::<Result<Vec<_>, _>>().unwrap(); /// /// assert_eq!(tokens.iter().map(|t| t.text()).collect::<Vec<_>>(), /// ["io", ":", "format", "(", r#""Hello""#, ")", "."]); /// ``` #[derive(Debug)] pub struct Tokenizer<T> { text: T, next_pos: Position, } impl<T> Tokenizer<T> where T: AsRef<str>, { /// Makes a new `Tokenizer` instance which tokenize the Erlang source code text. pub fn new(text: T) -> Self { let init_pos = Position::new(); Tokenizer { text, next_pos: init_pos.clone(), } } /// Sets the file path of the succeeding tokens. pub fn set_filepath<P: AsRef<Path>>(&mut self, filepath: P) { self.next_pos.set_filepath(filepath); } /// Returns the input text. pub fn text(&self) -> &str { self.text.as_ref() } /// Finishes tokenization and returns the target text. pub fn finish(self) -> T { self.text } /// Returns the cursor position from which this tokenizer will start to scan the next token. /// /// # Examples /// /// ``` /// use erl_tokenize::Tokenizer; /// /// let src = r#"io:format( /// "Hello")."#; /// /// let mut tokenizer = Tokenizer::new(src); /// assert_eq!(tokenizer.next_position().offset(), 0); /// /// assert_eq!(tokenizer.next().unwrap().map(|t| t.text().to_owned()).unwrap(), "io"); /// assert_eq!(tokenizer.next_position().offset(), 2); /// tokenizer.next(); // ':' /// tokenizer.next(); // 'format' /// tokenizer.next(); // '(' /// tokenizer.next(); // '\n' /// assert_eq!(tokenizer.next_position().offset(), 11); /// assert_eq!(tokenizer.next_position().line(), 2); /// assert_eq!(tokenizer.next_position().column(), 1); /// assert_eq!(tokenizer.next().unwrap().map(|t| t.text().to_owned()).unwrap(), " "); /// assert_eq!(tokenizer.next_position().offset(), 12); /// assert_eq!(tokenizer.next_position().line(), 2); /// assert_eq!(tokenizer.next_position().column(), 2); /// ``` pub fn next_position(&self) -> Position { self.next_pos.clone() } } impl<T> Iterator for Tokenizer<T> where T: AsRef<str>, { type Item = Result<Token>; fn next(&mut self) -> Option<Self::Item> { if self.next_pos.offset() >= self.text.as_ref().len() { None } else { let text = unsafe { self.text.as_ref().slice_unchecked( self.next_pos.offset(), self.text.as_ref().len(), ) }; let cur_pos = self.next_pos.clone(); match track!(Token::from_text(text, cur_pos)) { Err(e) => Some(Err(e)), Ok(t) => { self.next_pos = t.end_position(); Some(Ok(t)) } } } } }