cssparser/
unicode_range.rs

1/* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4
5//! https://drafts.csswg.org/css-syntax/#urange
6
7use crate::tokenizer::Token;
8use crate::{BasicParseError, Parser, ToCss};
9use std::char;
10use std::fmt;
11
12/// One contiguous range of code points.
13///
14/// Can not be empty. Can represent a single code point when start == end.
15#[derive(PartialEq, Eq, Clone, Hash)]
16#[repr(C)]
17pub struct UnicodeRange {
18    /// Inclusive start of the range. In [0, end].
19    pub start: u32,
20
21    /// Inclusive end of the range. In [0, 0x10FFFF].
22    pub end: u32,
23}
24
25impl UnicodeRange {
26    /// https://drafts.csswg.org/css-syntax/#urange-syntax
27    pub fn parse<'i>(input: &mut Parser<'i, '_>) -> Result<Self, BasicParseError<'i>> {
28        // <urange> =
29        //   u '+' <ident-token> '?'* |
30        //   u <dimension-token> '?'* |
31        //   u <number-token> '?'* |
32        //   u <number-token> <dimension-token> |
33        //   u <number-token> <number-token> |
34        //   u '+' '?'+
35
36        input.expect_ident_matching("u")?;
37        let after_u = input.position();
38        parse_tokens(input)?;
39
40        // This deviates from the spec in case there are CSS comments
41        // between tokens in the middle of one <unicode-range>,
42        // but oh well…
43        let concatenated_tokens = input.slice_from(after_u);
44
45        let range = match parse_concatenated(concatenated_tokens.as_bytes()) {
46            Ok(range) => range,
47            Err(()) => {
48                return Err(input
49                    .new_basic_unexpected_token_error(Token::Ident(concatenated_tokens.into())))
50            }
51        };
52        if range.end > char::MAX as u32 || range.start > range.end {
53            Err(input.new_basic_unexpected_token_error(Token::Ident(concatenated_tokens.into())))
54        } else {
55            Ok(range)
56        }
57    }
58}
59
60fn parse_tokens<'i>(input: &mut Parser<'i, '_>) -> Result<(), BasicParseError<'i>> {
61    match input.next_including_whitespace()?.clone() {
62        Token::Delim('+') => {
63            match *input.next_including_whitespace()? {
64                Token::Ident(_) => {}
65                Token::Delim('?') => {}
66                ref t => {
67                    let t = t.clone();
68                    return Err(input.new_basic_unexpected_token_error(t));
69                }
70            }
71            parse_question_marks(input)
72        }
73        Token::Dimension { .. } => parse_question_marks(input),
74        Token::Number { .. } => {
75            let after_number = input.state();
76            match input.next_including_whitespace() {
77                Ok(&Token::Delim('?')) => parse_question_marks(input),
78                Ok(&Token::Dimension { .. }) => {}
79                Ok(&Token::Number { .. }) => {}
80                _ => input.reset(&after_number),
81            }
82        }
83        t => return Err(input.new_basic_unexpected_token_error(t)),
84    }
85    Ok(())
86}
87
88/// Consume as many '?' as possible
89fn parse_question_marks(input: &mut Parser) {
90    loop {
91        let start = input.state();
92        match input.next_including_whitespace() {
93            Ok(&Token::Delim('?')) => {}
94            _ => {
95                input.reset(&start);
96                return;
97            }
98        }
99    }
100}
101
102fn parse_concatenated(text: &[u8]) -> Result<UnicodeRange, ()> {
103    let mut text = match text.split_first() {
104        Some((&b'+', text)) => text,
105        _ => return Err(()),
106    };
107    let (first_hex_value, hex_digit_count) = consume_hex(&mut text, 6)?;
108    let question_marks = consume_question_marks(&mut text);
109    let consumed = hex_digit_count + question_marks;
110    if consumed == 0 || consumed > 6 {
111        return Err(());
112    }
113
114    if question_marks > 0 {
115        if text.is_empty() {
116            return Ok(UnicodeRange {
117                start: first_hex_value << (question_marks * 4),
118                end: ((first_hex_value + 1) << (question_marks * 4)) - 1,
119            });
120        }
121    } else if text.is_empty() {
122        return Ok(UnicodeRange {
123            start: first_hex_value,
124            end: first_hex_value,
125        });
126    } else if let Some((&b'-', mut text)) = text.split_first() {
127        let (second_hex_value, hex_digit_count) = consume_hex(&mut text, 6)?;
128        if hex_digit_count > 0 && hex_digit_count <= 6 && text.is_empty() {
129            return Ok(UnicodeRange {
130                start: first_hex_value,
131                end: second_hex_value,
132            });
133        }
134    }
135    Err(())
136}
137
138// Consume hex digits, but return an error if more than digit_limit are found.
139fn consume_hex(text: &mut &[u8], digit_limit: usize) -> Result<(u32, usize), ()> {
140    let mut value = 0;
141    let mut digits = 0;
142    while let Some((&byte, rest)) = text.split_first() {
143        if let Some(digit_value) = (byte as char).to_digit(16) {
144            if digits == digit_limit {
145                return Err(());
146            }
147            value = value * 0x10 + digit_value;
148            digits += 1;
149            *text = rest;
150        } else {
151            break;
152        }
153    }
154    Ok((value, digits))
155}
156
157fn consume_question_marks(text: &mut &[u8]) -> usize {
158    let mut question_marks = 0;
159    while let Some((&b'?', rest)) = text.split_first() {
160        question_marks += 1;
161        *text = rest
162    }
163    question_marks
164}
165
166impl fmt::Debug for UnicodeRange {
167    fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
168        self.to_css(formatter)
169    }
170}
171
172impl ToCss for UnicodeRange {
173    fn to_css<W>(&self, dest: &mut W) -> fmt::Result
174    where
175        W: fmt::Write,
176    {
177        write!(dest, "U+{:X}", self.start)?;
178        if self.end != self.start {
179            write!(dest, "-{:X}", self.end)?;
180        }
181        Ok(())
182    }
183}