1use crate::is_whitespace;
2use std::str::from_utf8;
3
4const MODE_WIDTH: u8 = 10;
5
6lazy_static::lazy_static! {
7 static ref WIDTHS: Vec<(char, u8)> = {
8 use std::io::Read;
9 let mut raw = include_bytes!("character_widths.bin").as_slice();
11
12 let mut mode = [0u8];
14 raw.read(&mut mode).unwrap();
15 let mode = mode[0];
16
17 assert_eq!(mode, MODE_WIDTH);
18
19 let mut widths = Vec::new();
20
21 while !raw.is_empty() {
22 let s = from_utf8(&raw[..1])
25 .or_else(|_| from_utf8(&raw[..2]))
26 .or_else(|_| from_utf8(&raw[..3]))
27 .or_else(|_| from_utf8(&raw[..4]))
28 .unwrap();
29 let c = s.chars().next().unwrap();
30 raw = &raw[c.len_utf8()..];
31
32 let mut len = [0u8];
34 raw.read(&mut len).unwrap();
35 let len = len[0];
36
37 widths.push((c, len));
38 }
39
40 widths
41 };
42}
43
44#[cfg_attr(doc, doc(cfg(feature = "width")))]
52pub fn width(c: char) -> usize {
53 let width = match WIDTHS.binary_search_by_key(&c, |&(c, _)| c) {
54 Ok(idx) => WIDTHS[idx].1,
55 Err(_) => MODE_WIDTH,
56 } as usize;
57
58 width * 100
59}
60
61#[cfg_attr(doc, doc(cfg(feature = "width")))]
65pub fn width_str(s: &str) -> usize {
66 s.chars().map(|c| width(c) / 100).sum::<usize>() / 10
67}
68
69#[derive(Copy, Clone, Debug)]
73#[non_exhaustive]
74pub enum WordBreak {
75 BreakAll,
78}
79
80pub fn width_str_max_unbroken(s: &str, _word_break: WordBreak) -> usize {
87 let mut start = 0;
88 break_all_linebreaks(&s)
89 .map(|p| {
90 let unbroken = &s[start..p];
91 start = p;
92 width_str(unbroken.trim_end_matches(is_whitespace))
93 })
94 .max()
95 .unwrap_or(0)
96}
97
98fn break_all_linebreaks(s: &str) -> impl Iterator<Item = usize> + '_ {
101 use finl_unicode::categories::{CharacterCategories, MinorCategory};
102
103 use itertools::Itertools;
104 s.char_indices()
105 .tuple_windows()
106 .filter_map(|((_, c1), (p, c2))| {
107 let c1 = c1.get_minor_category();
108 let c2 = c2.get_minor_category();
109 let break_all = !matches!(c1, MinorCategory::Mn | MinorCategory::Mc)
110 && !matches!(c2, MinorCategory::Mn | MinorCategory::Mc);
111 if break_all
112 || [c1, c2]
113 .into_iter()
114 .any(|c| matches!(c, MinorCategory::Zs | MinorCategory::Zl))
115 {
116 Some(p)
117 } else {
118 None
119 }
120 })
121 .chain(std::iter::once(s.len()))
122}
123
124pub fn trim_to_width(s: &str, mut budget: usize) -> &str {
127 budget *= 10;
129 for (idx, c) in s.char_indices() {
130 match budget.checked_sub(width(c) / 100) {
131 Some(new_budget) => budget = new_budget,
132 None => return &s[..idx],
133 }
134 }
135 return s;
136}
137
138#[cfg(test)]
139mod test {
140 use crate::width::{trim_to_width, width_str, WordBreak};
141 use crate::{width, width_str_max_unbroken, CensorStr};
142 use serial_test::serial;
143
144 #[test]
152 pub fn unbroken() {
153 let tests = [
154 ("", 0),
155 ("m", 1),
156 ("mm", 1),
157 ("m m", 1),
158 ("m m", 1),
159 ("mm m", 1),
160 ("m mm", 1),
161 ("m;m", 1),
162 ];
163 for (s, w) in tests {
164 assert_eq!(width_str_max_unbroken(s, WordBreak::BreakAll), w, "{s} {w}");
165 }
166 }
167
168 #[test]
169 pub fn m() {
170 assert_eq!(width('m'), 1000);
171 }
172
173 #[test]
174 pub fn fdfd() {
175 assert_eq!(width('\u{FDFD}'), 10300)
177 }
178
179 #[test]
180 pub fn three_em_dash() {
181 assert!(width('⸻') >= 2500);
182 }
183
184 #[test]
185 pub fn lattice() {
186 assert!(width('𒐫') >= 3000);
187 }
188
189 #[test]
190 pub fn cuneiform() {
191 assert!(width('𒈙') >= 3000);
192 }
193
194 #[test]
195 pub fn javanese() {
196 assert!(width('꧅') >= 1500);
197 }
198
199 #[test]
200 pub fn tamil() {
201 assert_eq!(
202 width_str_max_unbroken("abc ௌௌௌௌ def", WordBreak::BreakAll),
203 10
204 );
205 assert_eq!(width_str_max_unbroken("abc ௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌௌ", WordBreak::BreakAll), 345);
206 }
207
208 #[test]
209 pub fn emoji() {
210 assert_eq!(width_str("😀🐿"), 4);
211 }
212
213 #[test]
214 pub fn cjk() {
215 assert_eq!(width_str("大はㅂ"), 6)
216 }
217
218 #[test]
219 pub fn string() {
220 assert_eq!(width_str("abc‱DŽဪ"), 8);
222 }
223
224 #[test]
225 #[serial]
226 pub fn tall() {
227 assert_eq!("a꧁a".censor(), "aa");
228 }
229
230 #[test]
231 #[serial]
232 pub fn trim() {
233 assert_eq!(trim_to_width("aa", 0), "");
234 assert_eq!(trim_to_width("mmm", 1), "m");
235 assert_eq!(trim_to_width("mmm", 2), "mm");
236 assert_eq!(trim_to_width("mmm", 3), "mmm");
237 assert_eq!(trim_to_width("mmm", 4), "mmm");
238
239 let mut s = String::new();
240 for u in 0..10000 {
241 if let Some(c) = char::from_u32(u) {
242 s.push(c);
243 }
244 }
245 for b in 0..1000 {
246 let t = trim_to_width(&s, b);
247 let w = width_str(t);
248 assert!(w <= b);
249 assert!(w + 15 >= b)
250 }
251 }
252}