wz_utf8/
lib.rs

1//! UTF-8 counters for [wz]
2//!
3//! [wz]: https://crates.io/crates/wz
4#![no_std]
5use wz_core::*;
6
7/// Byte counter for UTF-8 encoded byte slices
8///
9/// ```
10/// use wz_core::Counter;
11/// use wz_utf8::Bytes;
12///
13/// let counter = Bytes::default();
14/// ```
15#[derive(Clone, Debug, Default)]
16pub struct Bytes {
17    n: usize,
18}
19
20impl<T> Counter<T> for Bytes
21where
22    T: BytesCollector,
23{
24    fn count(&mut self, bytes: &[u8]) {
25        self.n += bytes.len();
26    }
27
28    fn output(&self, collector: &mut T) {
29        collector.collect(self.n)
30    }
31}
32
33/// Character counter for UTF-8 encoded byte slices
34///
35/// ```
36/// use wz_core::Counter;
37/// use wz_utf8::Chars;
38///
39/// let counter = Chars::default();
40/// ```
41#[derive(Clone, Debug, Default)]
42pub struct Chars {
43    n: usize,
44}
45
46impl<T> Counter<T> for Chars
47where
48    T: CharsCollector,
49{
50    fn count(&mut self, bytes: &[u8]) {
51        self.n += bytecount::num_chars(bytes)
52    }
53
54    fn output(&self, count: &mut T) {
55        count.collect(self.n);
56    }
57}
58
59/// Line counter for UTF-8 encoded byte slices
60///
61/// ```
62/// use wz_core::Counter;
63/// use wz_utf8::Lines;
64///
65/// let counter = Lines::with_linebreak(b'\n');
66/// ```
67#[derive(Clone, Debug)]
68pub struct Lines {
69    n: usize,
70    line_break: u8,
71}
72
73impl Lines {
74    /// Creates a new Lines counter that counts `line_break` bytes
75    pub fn with_linebreak(line_break: u8) -> Self {
76        Self { n: 0, line_break }
77    }
78    /// Creates a new Lines counter that counts `\n` bytes
79    pub fn line_feed() -> Self {
80        Self::with_linebreak(b'\n')
81    }
82    /// Creates a new Lines counter that counts `\r` bytes
83    pub fn carriage_return() -> Self {
84        Self::with_linebreak(b'\r')
85    }
86}
87
88impl<T> Counter<T> for Lines
89where
90    T: LinesCollector,
91{
92    fn count(&mut self, bytes: &[u8]) {
93        self.n += bytecount::count(bytes, self.line_break)
94    }
95
96    fn output(&self, collector: &mut T) {
97        collector.collect(self.n);
98    }
99}
100/// Word counter for UTF-8 encoded byte slices
101///
102/// A word boundary is defined in `isspace(3)`
103///
104/// ```
105/// use wz_core::Counter;
106/// use wz_utf8::Words;
107///
108/// let counter = Words::default();
109/// ```
110#[derive(Clone, Debug, Default)]
111pub struct Words {
112    n: usize,
113    on_word: bool,
114}
115
116impl<T> Counter<T> for Words
117where
118    T: WordsCollector,
119{
120    fn count(&mut self, bytes: &[u8]) {
121        *self = bytes.iter().fold(self.clone(), |acc, next| {
122            // matches!(x, 0x20 | 0x09 | 0x0A..=0x0D) == ISSPACE
123            let on_word = !matches!(next, 0x20 | 0x09 | 0x0A..=0x0D);
124            let n = acc.n + usize::from(acc.on_word && !on_word);
125            Self { n, on_word }
126        });
127    }
128
129    fn output(&self, counter: &mut T) {
130        counter.collect(self.n + usize::from(self.on_word));
131    }
132}
133
134/// Max line length counter for UTF-8 encoded byte slices
135///
136/// ```
137/// use wz_core::Counter;
138/// use wz_utf8::MaxLineLength;
139///
140/// let counter = MaxLineLength::with_linebreak(b'\n');
141/// ```
142#[derive(Clone, Debug)]
143pub struct MaxLineLength {
144    max: usize,
145    character_counter: Chars,
146    line_break: u8,
147}
148
149impl MaxLineLength {
150    /// Creates a new MaxLineLength counter that looks for line_break bytes
151    pub fn with_linebreak(line_break: u8) -> Self {
152        Self {
153            max: 0,
154            line_break,
155            character_counter: Default::default(),
156        }
157    }
158    /// Creates a new MaxLineLength counter that looks  for '\n'
159    pub fn line_feed() -> Self {
160        Self::with_linebreak(b'\n')
161    }
162    /// Creates a new MaxLineLength counter that looks for '\r'
163    pub fn carriage_return() -> Self {
164        Self::with_linebreak(b'\r')
165    }
166}
167
168impl<T> Counter<T> for MaxLineLength
169where
170    T: MaxLineLengthCollector,
171{
172    fn count(&mut self, input: &[u8]) {
173        let mut index = 0;
174
175        while let Some(offset_index) = memchr::memchr(self.line_break, &input[index..]) {
176            Counter::<usize>::count(
177                &mut self.character_counter,
178                &input[index..offset_index + index],
179            );
180            let mut chars = 0;
181            self.character_counter.output(&mut chars);
182            index += offset_index + 1;
183            self.max = core::cmp::max(self.max, chars);
184            self.character_counter = Default::default();
185        }
186        Counter::<usize>::count(&mut self.character_counter, &input[index..]);
187    }
188
189    fn output(&self, collector: &mut T) {
190        let mut chars = 0;
191        self.character_counter.output(&mut chars);
192        let count = core::cmp::max(self.max, chars);
193        collector.collect(count)
194    }
195}