1pub use self::CatTokenKind::*;
15use moore_common::source::*;
16
17pub struct Cat<'a> {
22 iter: Box<CharIter<'a>>,
23 last: usize,
24 chars: (Option<char>, Option<char>),
25 indices: (usize, usize),
26}
27
28impl<'a> Cat<'a> {
29 pub fn new(mut iter: Box<CharIter<'a>>) -> Cat<'a> {
31 let last = iter
32 .size_hint()
33 .1
34 .expect("Iterator must provide upper bounds");
35 let c0 = iter.next();
36 let c1 = iter.next();
37 Cat {
38 iter: iter,
39 last: last,
40 chars: (c0.map(|x| x.1), c1.map(|x| x.1)),
41 indices: (
42 c0.map(|x| x.0).unwrap_or(last),
43 c1.map(|x| x.0).unwrap_or(last),
44 ),
45 }
46 }
47
48 fn bump(&mut self) {
50 let c = self.iter.next();
51 self.chars = (self.chars.1, c.map(|x| x.1));
52 self.indices = (self.indices.1, c.map(|x| x.0).unwrap_or(self.last));
53 }
54}
55
56impl<'a> Iterator for Cat<'a> {
57 type Item = CatToken;
58
59 fn next(&mut self) -> Option<Self::Item> {
60 match self.chars {
61 (None, _) => None,
62
63 (Some('\n'), _) => {
65 let t = CatToken(Newline, self.indices.0, self.indices.1);
66 self.bump();
67 Some(t)
68 }
69
70 (Some(c), _) if is_whitespace(c) => {
72 let p0 = self.indices.0;
73 while let (Some(c), _) = self.chars {
74 if !is_whitespace(c) {
75 break;
76 }
77 self.bump();
78 }
79 Some(CatToken(Whitespace, p0, self.indices.0))
80 }
81
82 (Some('/'), Some('/')) => {
85 let p0 = self.indices.0;
86 while let (Some(c), _) = self.chars {
87 if c == '\n' {
88 break;
89 }
90 self.bump();
91 }
92 Some(CatToken(Comment, p0, self.indices.0))
93 }
94
95 (Some('/'), Some('*')) => {
97 let p0 = self.indices.0;
98 while let (Some(c0), Some(c1)) = self.chars {
99 if c0 == '*' && c1 == '/' {
100 self.bump();
101 self.bump();
102 break;
103 }
104 self.bump();
105 }
106 Some(CatToken(Comment, p0, self.indices.0))
107 }
108
109 (Some(c), _) if is_symbol(c) => {
112 let t = CatToken(Symbol(c), self.indices.0, self.indices.1);
113 self.bump();
114 Some(t)
115 }
116
117 (Some(c), _) if is_digit(c) => {
119 let p0 = self.indices.0;
120 while let (Some(c), _) = self.chars {
121 if !is_digit(c) {
122 break;
123 }
124 self.bump();
125 }
126 Some(CatToken(Digits, p0, self.indices.0))
127 }
128
129 (Some(_), _) => {
131 let p0 = self.indices.0;
132 while let (Some(c), _) = self.chars {
133 if c == '\n' || is_whitespace(c) || is_symbol(c) {
134 break;
135 }
136 self.bump();
137 }
138 Some(CatToken(Text, p0, self.indices.0))
139 }
140 }
141 }
142}
143
144fn is_whitespace(c: char) -> bool {
147 c == ' ' || c == '\t' || c == '\r' || c == (0xA0 as char)
148}
149
150fn is_digit(c: char) -> bool {
152 c >= '0' && c <= '9'
153}
154
155fn is_symbol(c: char) -> bool {
157 match c {
158 '(' | ')' | '[' | ']' | '{' | '}' | '#' | ':' | ';' | '.' | ',' | '=' | '+' | '-' | '*'
159 | '/' | '~' | '|' | '<' | '>' | '!' | '%' | '^' | '&' | '?' | '\'' | '"' | '`' | '$'
160 | '\\' | '@' => true,
161 _ => false,
163 }
164}
165
166#[derive(Clone, Copy, PartialEq, Eq, Debug)]
168pub struct CatToken(pub CatTokenKind, pub usize, pub usize);
169
170#[derive(Clone, Copy, PartialEq, Eq, Debug)]
172pub enum CatTokenKind {
173 Newline,
174 Whitespace,
175 Comment,
176 Symbol(char),
177 Text,
178 Digits,
179 Eof,
180}
181
182#[cfg(test)]
183mod tests {
184 use super::*;
185
186 fn lex(input: &str) -> Vec<CatToken> {
187 Cat::new(Box::new(input.char_indices())).collect()
188 }
189
190 #[test]
191 fn empty() {
192 assert_eq!(lex(""), vec![]);
193 }
194
195 #[test]
196 fn non_empty() {
197 assert_eq!(
198 lex("Löwe 老虎 1234Léopard\n"),
199 vec![
200 CatToken(Text, 0, 5),
201 CatToken(Whitespace, 5, 6),
202 CatToken(Text, 6, 12),
203 CatToken(Whitespace, 12, 13),
204 CatToken(Digits, 13, 17),
205 CatToken(Text, 17, 25),
206 CatToken(Newline, 25, 26),
207 ]
208 );
209 }
210}