unicode_names2_generator/
util.rs

1/// Figure out whether we need `u8` (1 byte), `u16` (2 bytes) or `u32` (4 bytes) to store all
2/// numbers. Returns the number of bytes
3pub fn smallest_type<I: Iterator<Item = u32>>(x: I) -> usize {
4    let n = x.max().unwrap_or(0);
5    for (max, bytes) in [(u8::MAX as u32, 1), (u16::MAX as u32, 2)] {
6        if n <= max {
7            return bytes;
8        }
9    }
10    4
11}
12
13pub fn smallest_u<I: Iterator<Item = u32>>(x: I) -> String {
14    format!("u{}", 8 * smallest_type(x))
15}
16pub fn split<'a, 'b>(s: &'a str, splitters: &'b [u8]) -> Split<'a, 'b> {
17    Split {
18        s,
19        splitters,
20        pending: "",
21        done: false,
22    }
23}
24
25pub struct Split<'a, 'b> {
26    s: &'a str,
27    splitters: &'b [u8],
28    pending: &'a str,
29    done: bool,
30}
31impl<'a, 'b> Iterator for Split<'a, 'b> {
32    type Item = &'a str;
33    fn next(&mut self) -> Option<&'a str> {
34        if self.done {
35            return None;
36        }
37        if self.s.is_empty() {
38            self.done = true;
39            return Some("");
40        }
41        if !self.pending.is_empty() {
42            return Some(std::mem::take(&mut self.pending));
43        }
44
45        for (i, b) in self.s.bytes().enumerate() {
46            if b == b' ' || self.splitters.contains(&b) {
47                let ret = &self.s[..i];
48                // dont include the space, but include everything else on the next step
49                if b != b' ' {
50                    self.pending = &self.s[i..i + 1]
51                }
52                self.s = &self.s[i + 1..];
53                return Some(ret);
54            }
55        }
56        // trailing data
57        self.done = true;
58        Some(self.s)
59    }
60}
61
62#[test]
63fn test_split() {
64    let tests: &[(&str, &[&str])] = &[
65        ("a", &["a"]),
66        ("a b", &["a", "b"]),
67        (" a b ", &["", "a", "b", ""]),
68        ("a-b", &["a", "-", "b"]),
69        ("a- b", &["a", "-", "", "b"]),
70        ("a -b", &["a", "", "-", "b"]),
71    ];
72
73    for &(s, v) in tests.iter() {
74        assert_eq!(split(s, b"-").collect::<Vec<&str>>(), v)
75    }
76}