css_parser/
lexer.rs

1use source_map::{SourceId, Span};
2use tokenizer_lib::{Token, TokenSender};
3
4use crate::ParseError;
5
6#[derive(PartialEq, Eq, Debug)]
7pub enum CSSToken {
8    Ident(String),
9    Comment(String),
10    /// HashPrefixedValue. Is a separate member to prevent lexing #0f5421 as a number.
11    /// e.g #my-idx, #ffffff
12    HashPrefixedValue(String),
13    /// e.g 42
14    Number(String),
15    /// e.g "SF Pro Display"
16    String(String),
17    OpenCurly,
18    CloseCurly,
19    OpenBracket,
20    CloseBracket,
21    Colon,
22    SemiColon,
23    Dot,
24    CloseAngle,
25    Comma,
26    Asterisk,
27    Percentage,
28    /// END of source
29    EOS,
30}
31
32/// Lexes the source returning CSSToken sequence
33/// byte_offset marks spans
34pub fn lex_source(
35    source: &str,
36    sender: &mut impl TokenSender<CSSToken, Span>,
37    source_id: SourceId,
38    start_offset: Option<usize>,
39) -> Result<(), ParseError> {
40    #[derive(PartialEq)]
41    enum ParsingState {
42        Ident,
43        Number,
44        /// Used to decide whether class identifier or number
45        Dot,
46        String {
47            escaped: bool,
48        },
49        HashPrefixedValue,
50        Comment {
51            found_asterisk: bool,
52        },
53        None,
54    }
55
56    let mut state = ParsingState::None;
57
58    // Used for getting string slices from source
59    let mut start = 0;
60    let start_offset = start_offset.unwrap_or_default();
61
62    for (idx, chr) in source.char_indices() {
63        macro_rules! set_state {
64            ($s:expr) => {{
65                start = idx;
66                state = $s;
67            }};
68        }
69
70        macro_rules! push_token {
71            ($t:expr) => {{
72                if !sender.push(Token($t, current_position!())) {
73                    return Ok(());
74                };
75            }};
76        }
77
78        macro_rules! current_position {
79            () => {
80                Span {
81                    start: start_offset + start,
82                    end: idx,
83                    source_id,
84                }
85            };
86        }
87
88        match state {
89            ParsingState::Ident => match chr {
90                'A'..='Z' | 'a'..='z' | '0'..='9' | '-' => {}
91                _ => {
92                    push_token!(CSSToken::Ident(source[start..idx].to_owned()));
93                    set_state!(ParsingState::None);
94                }
95            },
96            ParsingState::HashPrefixedValue => match chr {
97                'A'..='Z' | 'a'..='z' | '0'..='9' | '-' => {}
98                _ => {
99                    push_token!(CSSToken::HashPrefixedValue(
100                        source[(start + 1)..idx].to_owned()
101                    ));
102                    set_state!(ParsingState::None);
103                }
104            },
105            ParsingState::Dot => {
106                if matches!(chr, '0'..='9') {
107                    state = ParsingState::Number;
108                } else {
109                    push_token!(CSSToken::Dot);
110                    set_state!(ParsingState::Ident);
111                }
112            }
113            ParsingState::Number => match chr {
114                '0'..='9' | '.' => {}
115                _ => {
116                    push_token!(CSSToken::Number(source[start..idx].to_owned()));
117                    set_state!(ParsingState::None);
118                }
119            },
120            ParsingState::String { ref mut escaped } => match chr {
121                '\\' => {
122                    *escaped = true;
123                }
124                '"' if !*escaped => {
125                    push_token!(CSSToken::String(source[(start + 1)..idx].to_owned()));
126                    set_state!(ParsingState::None);
127                    continue;
128                }
129                _ => *escaped = false,
130            },
131            ParsingState::Comment {
132                ref mut found_asterisk,
133            } => match chr {
134                '/' if *found_asterisk => {
135                    push_token!(CSSToken::Comment(source[(start + 2)..(idx - 1)].to_owned()));
136                    set_state!(ParsingState::None);
137                    continue;
138                }
139                chr => {
140                    *found_asterisk = chr == '*';
141                }
142            },
143            ParsingState::None => {}
144        }
145
146        if state == ParsingState::None {
147            match chr {
148                'A'..='Z' | 'a'..='z' => set_state!(ParsingState::Ident),
149                '/' => set_state!(ParsingState::Comment {
150                    found_asterisk: true
151                }),
152                '.' => set_state!(ParsingState::Dot),
153                '"' => set_state!(ParsingState::String { escaped: false }),
154                '#' => set_state!(ParsingState::HashPrefixedValue),
155                '0'..='9' => set_state!(ParsingState::Number),
156                chr if chr.is_whitespace() => {
157                    continue;
158                }
159                chr => {
160                    let token = match chr {
161                        '{' => CSSToken::OpenCurly,
162                        '}' => CSSToken::CloseCurly,
163                        '(' => CSSToken::OpenBracket,
164                        ')' => CSSToken::CloseBracket,
165                        ':' => CSSToken::Colon,
166                        ';' => CSSToken::SemiColon,
167                        ',' => CSSToken::Comma,
168                        '>' => CSSToken::CloseAngle,
169                        '.' => CSSToken::Dot,
170                        '*' => CSSToken::Asterisk,
171                        '%' => CSSToken::Percentage,
172                        chr => {
173                            return Err(ParseError {
174                                reason: format!("Invalid character '{}'", chr),
175                                position: current_position!(),
176                            })
177                        }
178                    };
179                    start = idx;
180                    push_token!(token);
181                    continue;
182                }
183            }
184        }
185    }
186
187    let end_of_source = source.len();
188
189    match state {
190        ParsingState::Ident => {
191            sender.push(Token(
192                CSSToken::Ident(source[start..].to_owned()),
193                Span {
194                    start,
195                    end: end_of_source,
196                    source_id,
197                },
198            ));
199        }
200        ParsingState::Number => {
201            sender.push(Token(
202                CSSToken::Number(source[start..].to_owned()),
203                Span {
204                    start,
205                    end: end_of_source,
206                    source_id,
207                },
208            ));
209        }
210        ParsingState::HashPrefixedValue => {
211            sender.push(Token(
212                CSSToken::HashPrefixedValue(source[(start + 1)..].to_owned()),
213                Span {
214                    start,
215                    end: end_of_source,
216                    source_id,
217                },
218            ));
219        }
220        ParsingState::Comment { .. } => {
221            return Err(ParseError {
222                reason: "Could not find end to comment".to_owned(),
223                position: Span {
224                    start,
225                    end: end_of_source,
226                    source_id,
227                },
228            })
229        }
230        ParsingState::String { .. } => {
231            return Err(ParseError {
232                reason: "Could not find end to string".to_owned(),
233                position: Span {
234                    start,
235                    end: end_of_source,
236                    source_id,
237                },
238            })
239        }
240        ParsingState::Dot => {
241            return Err(ParseError {
242                reason: "Found trailing \".\"".to_owned(),
243                position: Span {
244                    start,
245                    end: end_of_source,
246                    source_id,
247                },
248            })
249        }
250        ParsingState::None => {}
251    }
252
253    sender.push(Token(
254        CSSToken::EOS,
255        Span {
256            start: end_of_source,
257            end: end_of_source,
258            source_id,
259        },
260    ));
261
262    Ok(())
263}