xkbcommon_rs/
lexer_utils.rs

1//based on part of scanner-utils.h
2/*
3 * Copyright © 2012 Ran Benita <ran234@gmail.com>
4 * Copyright © 2024 wysiwys
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
24 */
25
26pub(crate) fn check_supported_char_encoding(s: &str) -> Result<&str, ()> {
27    if s.len() < 2 {
28        return Ok(s);
29    }
30
31    let bom = "\u{feff}";
32    if let Some(s) = s.strip_prefix(bom) {
33        return Ok(s);
34    }
35
36    // early detection of wrong file encoding, e.g. UTF-16 or UTF-32
37
38    let mut c = s.chars();
39    let first_char = c.next().unwrap();
40    let second_char = c.next().unwrap();
41
42    // TODO: is this necessary?
43    if first_char == '\0' || second_char == '\0' {
44        // TODO: handle case where first char not '\0'
45        log::error!("Unexpected null character");
46        return Err(());
47    }
48
49    // enforce the first character to be ASCII
50    if !first_char.is_ascii() {
51        log::error!("Unexpected non-ASCII character.");
52        return Err(());
53    }
54
55    Ok(s)
56}