azul_simplecss/
tokenizer.rs1use stream;
6use stream::Stream;
7use error::Error;
8
9#[derive(PartialEq,Debug)]
11pub enum Combinator {
12 Space,
14 GreaterThan,
16 Plus,
18 Tilde,
20}
21
22#[derive(PartialEq,Debug)]
24pub enum Token<'a> {
25 UniversalSelector,
29 TypeSelector(&'a str),
33 IdSelector(&'a str),
39 ClassSelector(&'a str),
45 AttributeSelector(&'a str),
51 PseudoClass { selector: &'a str, value: Option<&'a str> },
58 Combinator(Combinator),
60 Comma,
64 BlockStart,
70 BlockEnd,
76 Declaration(&'a str, &'a str),
82 AtRule(&'a str),
85 DeclarationStr(&'a str),
87 AtStr(&'a str),
89 DoublePseudoClass { selector: &'a str, value: Option<&'a str> },
91 EndOfStream,
95}
96
97#[derive(PartialEq)]
98enum State {
99 Rule,
100 Declaration,
101 DeclarationRule,
102}
103
104pub struct Tokenizer<'a> {
106 stream: Stream<'a>,
107 state: State,
108 after_selector: bool,
109 has_at_rule: bool,
110 at_start: bool,
111}
112
113impl<'a> Tokenizer<'a> {
114 pub fn new(text: &str) -> Tokenizer {
116 Tokenizer {
117 stream: Stream::new(text.as_bytes()),
118 state: State::Rule,
119 after_selector: false,
120 has_at_rule: false,
121 at_start: true,
122 }
123 }
124
125 pub fn new_bound(text: &str, start: usize, end: usize) -> Tokenizer {
133 Tokenizer {
134 stream: Stream::new_bound(text.as_bytes(), start, end),
135 state: State::Rule,
136 after_selector: false,
137 has_at_rule: false,
138 at_start: true,
139 }
140 }
141
142 pub fn pos(&self) -> usize {
144 self.stream.pos()
145 }
146
147 pub fn parse_next(&mut self) -> Result<Token<'a>, Error> {
149 if self.at_start {
150 self.stream.skip_spaces();
151 self.at_start = false;
152 }
153
154 if self.stream.at_end() {
155 return Ok(Token::EndOfStream);
156 }
157
158 match self.state {
159 State::Rule => self.consume_rule(),
160 State::Declaration => self.consume_declaration(),
161 State::DeclarationRule => self.consume_declaration(),
162 }
163 }
164
165 fn consume_rule(&mut self) -> Result<Token<'a>, Error> {
166 match self.stream.curr_char_raw() {
167 b'@' => {
168 self.after_selector = true;
169 self.has_at_rule = true;
170 self.stream.advance_raw(1);
171 let s = self.consume_ident()?;
172 return Ok(Token::AtRule(s));
173 }
174 b'#' => {
175 self.after_selector = true;
176 self.has_at_rule = false;
177 self.stream.advance_raw(1);
178 let s = try!(self.consume_ident());
179 return Ok(Token::IdSelector(s));
180 }
181 b'.' => {
182 self.after_selector = true;
183 self.has_at_rule = false;
184 self.stream.advance_raw(1);
185 let s = try!(self.consume_ident());
186 return Ok(Token::ClassSelector(s));
187 }
188 b'*' => {
189 self.after_selector = true;
190 self.has_at_rule = false;
191 self.stream.advance_raw(1);
192 self.stream.skip_spaces();
193 return Ok(Token::UniversalSelector);
194 }
195 b':' => {
196 self.after_selector = true;
197 self.has_at_rule = false;
198 self.stream.advance_raw(1);
199
200 let is_double_colon = self.stream.is_char_eq(b':')?;
202 if is_double_colon {
203 self.stream.advance_raw(1); }
205
206 let s = try!(self.consume_ident());
207
208 if self.stream.curr_char() == Ok(b'(') {
209 self.stream.advance_raw(1); let inner_len = self.stream.length_to(b')')?;
212 let inner = self.stream.read_raw_str(inner_len);
213 self.stream.advance_raw(1); return Ok(if is_double_colon {
215 Token::DoublePseudoClass { selector: s, value: Some(inner) }
216 } else {
217 Token::PseudoClass { selector: s, value: Some(inner) }
218 });
219 } else {
220 return Ok(if is_double_colon {
221 Token::DoublePseudoClass { selector: s, value: None }
222 } else {
223 Token::PseudoClass { selector: s, value: None }
224 });
225 }
226 }
227 b'[' => {
228 self.after_selector = true;
229 self.has_at_rule = false;
230 self.stream.advance_raw(1);
231 let len = try!(self.stream.length_to(b']'));
232 let s = self.stream.read_raw_str(len);
233 self.stream.advance_raw(1); self.stream.skip_spaces();
235 return Ok(Token::AttributeSelector(s));
236 }
237 b',' => {
238 self.after_selector = false;
239 self.has_at_rule = false;
240 self.stream.advance_raw(1);
241 self.stream.skip_spaces();
242 return Ok(Token::Comma);
243 }
244 b'{' => {
245 self.after_selector = false;
246 self.has_at_rule = false;
247 self.state = State::Declaration;
248 self.stream.advance_raw(1);
249 return Ok(Token::BlockStart);
250 }
251 b'>' => {
252 if self.after_selector {
253 self.after_selector = false;
254 self.has_at_rule = false;
255 self.stream.advance_raw(1);
256 self.stream.skip_spaces();
257 return Ok(Token::Combinator(Combinator::GreaterThan));
258 } else {
259 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
260 }
261 }
262 b'+' => {
263 if self.after_selector {
264 self.after_selector = false;
265 self.has_at_rule = false;
266 self.stream.advance_raw(1);
267 self.stream.skip_spaces();
268 return Ok(Token::Combinator(Combinator::Plus));
269 } else {
270 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
271 }
272 }
273 b'~' => {
274 if self.after_selector {
275 self.after_selector = false;
276 self.has_at_rule = false;
277 self.stream.advance_raw(1);
278 self.stream.skip_spaces();
279 return Ok(Token::Combinator(Combinator::Tilde));
280 } else {
281 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
282 }
283 }
284 b'/' => {
285 if try!(self.consume_comment()) {
286 return self.parse_next();
287 } else {
288 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
289 }
290 }
291 _ => {
292 if self.stream.is_space_raw() {
293 self.stream.skip_spaces();
294
295 if !self.after_selector {
296 return self.parse_next();
297 }
298
299 match self.stream.curr_char()? {
300 b'{' | b'/' | b'>' | b'+' | b'~' | b'*' => { return self.parse_next(); },
301 _ => {
302 self.after_selector = false;
303 if !self.has_at_rule {
304 return Ok(Token::Combinator(Combinator::Space));
305 }
306 }
307 }
308 }
309
310 let s = try!(self.consume_ident());
311 let token_type = if self.has_at_rule {
312 self.has_at_rule = true;
313 Token::AtStr(s)
314 } else {
315 self.has_at_rule = false;
316 Token::TypeSelector(s)
317 };
318
319 self.after_selector = true;
320 return Ok(token_type);
321 }
322 }
323 }
324
325 fn consume_declaration(&mut self) -> Result<Token<'a>, Error> {
326 self.stream.skip_spaces();
327 self.has_at_rule = false;
328
329 match self.stream.curr_char_raw() {
330 b'}' => {
331 if self.state == State::DeclarationRule {
332 self.state = State::Declaration;
333 } else if self.state == State::Declaration {
334 self.state = State::Rule;
335 }
336 self.stream.advance_raw(1);
337 self.stream.skip_spaces();
338 return Ok(Token::BlockEnd);
339 },
340 b'{' => {
341 if self.state == State::Rule {
342 self.state = State::Declaration;
343 } else if self.state == State::Declaration {
344 self.state = State::DeclarationRule;
345 }
346 self.stream.advance_raw(1);
347 self.stream.skip_spaces();
348 return Ok(Token::BlockStart);
349 },
350 b'/' => {
351 if try!(self.consume_comment()) {
352 return self.parse_next();
353 } else {
354 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
355 }
356 }
357 _ => {
358 let name = self.consume_ident()?;
359
360 self.stream.skip_spaces();
361
362 if self.stream.is_char_eq(b'/')? {
363 if !try!(self.consume_comment()) {
364 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
365 }
366 }
367
368 if self.stream.is_char_eq(b'{')? {
369 if name.is_empty() {
370 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
371 } else {
372 return Ok(Token::DeclarationStr(name));
373 }
374 }
375
376 self.stream.advance_raw(1); self.stream.skip_spaces();
378
379 if self.stream.is_char_eq(b'/')? {
380 if !try!(self.consume_comment()) {
381 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
382 }
383 }
384
385 let len = self.stream.length_to_either(&[b';', b'}'])?;
386
387 if len == 0 {
388 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
389 }
390
391 let mut value = self.stream.read_raw_str(len);
392 if let Some(p) = value.as_bytes().iter().rposition(|c| !stream::is_space(*c)) {
394 value = &value[0..(p + 1)];
395 }
396
397 self.stream.skip_spaces();
398 while try!(self.stream.is_char_eq(b';')) {
399 self.stream.advance_raw(1);
400 self.stream.skip_spaces();
401 }
402
403 Ok(Token::Declaration(name, value))
404 }
405 }
406 }
407
408 fn consume_ident(&mut self) -> Result<&'a str, Error> {
409 let start = self.stream.pos();
410
411 while !self.stream.at_end() {
412 if self.stream.is_ident_raw() {
413 try!(self.stream.advance(1));
414 } else {
415 break;
416 }
417 }
418
419 if start == self.stream.pos() {
420 return Err(Error::UnknownToken(self.stream.gen_error_pos()));
421 }
422
423 let s = self.stream.slice_region_raw_str(start, self.stream.pos());
424 Ok(s)
425 }
426
427 fn consume_comment(&mut self) -> Result<bool, Error> {
428 self.stream.advance_raw(1);
429
430 if try!(self.stream.is_char_eq(b'*')) {
431 self.stream.advance_raw(1); while !self.stream.at_end() {
434 let len = try!(self.stream.length_to(b'*'));
435 try!(self.stream.advance(len + 1));
436 if try!(self.stream.is_char_eq(b'/')) {
437 self.stream.advance_raw(1);
438 break;
439 }
440 }
441
442 return Ok(true);
443 } else {
444 return Ok(false);
445 }
446 }
447}