1use crate::atomic::{PyAtomic, Radium};
3use crate::format::CharLen;
4use crate::wtf8::{CodePoint, Wtf8, Wtf8Buf};
5use ascii::{AsciiChar, AsciiStr, AsciiString};
6use core::fmt;
7use core::ops::{Bound, RangeBounds};
8use core::sync::atomic::Ordering::Relaxed;
9
10#[cfg(not(target_arch = "wasm32"))]
11#[allow(non_camel_case_types)]
12pub type wchar_t = libc::wchar_t;
13#[cfg(target_arch = "wasm32")]
14#[allow(non_camel_case_types)]
15pub type wchar_t = u32;
16
17#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
19pub enum StrKind {
20 Ascii,
21 Utf8,
22 Wtf8,
23}
24
25impl core::ops::BitOr for StrKind {
26 type Output = Self;
27
28 fn bitor(self, other: Self) -> Self {
29 use StrKind::*;
30 match (self, other) {
31 (Wtf8, _) | (_, Wtf8) => Wtf8,
32 (Utf8, _) | (_, Utf8) => Utf8,
33 (Ascii, Ascii) => Ascii,
34 }
35 }
36}
37
38impl StrKind {
39 pub const fn is_ascii(&self) -> bool {
40 matches!(self, Self::Ascii)
41 }
42
43 pub const fn is_utf8(&self) -> bool {
44 matches!(self, Self::Ascii | Self::Utf8)
45 }
46
47 #[inline(always)]
48 pub fn can_encode(&self, code: CodePoint) -> bool {
49 match self {
50 Self::Ascii => code.is_ascii(),
51 Self::Utf8 => code.to_char().is_some(),
52 Self::Wtf8 => true,
53 }
54 }
55}
56
57pub trait DeduceStrKind {
58 fn str_kind(&self) -> StrKind;
59}
60
61impl DeduceStrKind for str {
62 fn str_kind(&self) -> StrKind {
63 if self.is_ascii() {
64 StrKind::Ascii
65 } else {
66 StrKind::Utf8
67 }
68 }
69}
70
71impl DeduceStrKind for Wtf8 {
72 fn str_kind(&self) -> StrKind {
73 if self.is_ascii() {
74 StrKind::Ascii
75 } else if self.is_utf8() {
76 StrKind::Utf8
77 } else {
78 StrKind::Wtf8
79 }
80 }
81}
82
83impl DeduceStrKind for String {
84 fn str_kind(&self) -> StrKind {
85 (**self).str_kind()
86 }
87}
88
89impl DeduceStrKind for Wtf8Buf {
90 fn str_kind(&self) -> StrKind {
91 (**self).str_kind()
92 }
93}
94
95impl<T: DeduceStrKind + ?Sized> DeduceStrKind for &T {
96 fn str_kind(&self) -> StrKind {
97 (**self).str_kind()
98 }
99}
100
101impl<T: DeduceStrKind + ?Sized> DeduceStrKind for Box<T> {
102 fn str_kind(&self) -> StrKind {
103 (**self).str_kind()
104 }
105}
106
107#[derive(Debug)]
108pub enum PyKindStr<'a> {
109 Ascii(&'a AsciiStr),
110 Utf8(&'a str),
111 Wtf8(&'a Wtf8),
112}
113
114#[derive(Debug, Clone)]
115pub struct StrData {
116 data: Box<Wtf8>,
117 kind: StrKind,
118 len: StrLen,
119}
120
121struct StrLen(PyAtomic<usize>);
122
123impl From<usize> for StrLen {
124 #[inline(always)]
125 fn from(value: usize) -> Self {
126 Self(Radium::new(value))
127 }
128}
129
130impl fmt::Debug for StrLen {
131 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
132 let len = self.0.load(Relaxed);
133 if len == usize::MAX {
134 f.write_str("<uncomputed>")
135 } else {
136 len.fmt(f)
137 }
138 }
139}
140
141impl StrLen {
142 #[inline(always)]
143 fn zero() -> Self {
144 0usize.into()
145 }
146
147 #[inline(always)]
148 fn uncomputed() -> Self {
149 usize::MAX.into()
150 }
151}
152
153impl Clone for StrLen {
154 fn clone(&self) -> Self {
155 Self(self.0.load(Relaxed).into())
156 }
157}
158
159impl Default for StrData {
160 fn default() -> Self {
161 Self {
162 data: <Box<Wtf8>>::default(),
163 kind: StrKind::Ascii,
164 len: StrLen::zero(),
165 }
166 }
167}
168
169impl From<Box<Wtf8>> for StrData {
170 fn from(value: Box<Wtf8>) -> Self {
171 let kind = value.str_kind();
174 unsafe { Self::new_str_unchecked(value, kind) }
175 }
176}
177
178impl From<Box<str>> for StrData {
179 #[inline]
180 fn from(value: Box<str>) -> Self {
181 let kind = value.str_kind();
184 unsafe { Self::new_str_unchecked(value.into(), kind) }
185 }
186}
187
188impl From<Box<AsciiStr>> for StrData {
189 #[inline]
190 fn from(value: Box<AsciiStr>) -> Self {
191 Self {
192 len: value.len().into(),
193 data: value.into(),
194 kind: StrKind::Ascii,
195 }
196 }
197}
198
199impl From<AsciiChar> for StrData {
200 fn from(ch: AsciiChar) -> Self {
201 AsciiString::from(ch).into_boxed_ascii_str().into()
202 }
203}
204
205impl From<char> for StrData {
206 fn from(ch: char) -> Self {
207 if let Ok(ch) = ascii::AsciiChar::from_ascii(ch) {
208 ch.into()
209 } else {
210 Self {
211 data: ch.to_string().into(),
212 kind: StrKind::Utf8,
213 len: 1.into(),
214 }
215 }
216 }
217}
218
219impl From<CodePoint> for StrData {
220 fn from(ch: CodePoint) -> Self {
221 if let Some(ch) = ch.to_char() {
222 ch.into()
223 } else {
224 Self {
225 data: Wtf8Buf::from(ch).into(),
226 kind: StrKind::Wtf8,
227 len: 1.into(),
228 }
229 }
230 }
231}
232
233impl StrData {
234 pub unsafe fn new_str_unchecked(data: Box<Wtf8>, kind: StrKind) -> Self {
238 let len = match kind {
239 StrKind::Ascii => data.len().into(),
240 _ => StrLen::uncomputed(),
241 };
242 Self { data, kind, len }
243 }
244
245 pub unsafe fn new_with_char_len(data: Box<Wtf8>, kind: StrKind, char_len: usize) -> Self {
249 Self {
250 data,
251 kind,
252 len: char_len.into(),
253 }
254 }
255
256 #[inline]
257 pub const fn as_wtf8(&self) -> &Wtf8 {
258 &self.data
259 }
260
261 #[inline]
263 pub fn as_str(&self) -> Option<&str> {
264 self.kind
265 .is_utf8()
266 .then(|| unsafe { core::str::from_utf8_unchecked(self.data.as_bytes()) })
267 }
268
269 pub fn as_ascii(&self) -> Option<&AsciiStr> {
270 self.kind
271 .is_ascii()
272 .then(|| unsafe { AsciiStr::from_ascii_unchecked(self.data.as_bytes()) })
273 }
274
275 pub const fn kind(&self) -> StrKind {
276 self.kind
277 }
278
279 #[inline]
280 pub fn as_str_kind(&self) -> PyKindStr<'_> {
281 match self.kind {
282 StrKind::Ascii => {
283 PyKindStr::Ascii(unsafe { AsciiStr::from_ascii_unchecked(self.data.as_bytes()) })
284 }
285 StrKind::Utf8 => {
286 PyKindStr::Utf8(unsafe { core::str::from_utf8_unchecked(self.data.as_bytes()) })
287 }
288 StrKind::Wtf8 => PyKindStr::Wtf8(&self.data),
289 }
290 }
291
292 #[inline]
293 pub fn len(&self) -> usize {
294 self.data.len()
295 }
296
297 pub fn is_empty(&self) -> bool {
298 self.data.is_empty()
299 }
300
301 #[inline]
302 pub fn char_len(&self) -> usize {
303 match self.len.0.load(Relaxed) {
304 usize::MAX => self._compute_char_len(),
305 len => len,
306 }
307 }
308
309 #[cold]
310 fn _compute_char_len(&self) -> usize {
311 let len = if let Some(s) = self.as_str() {
312 s.chars().count()
314 } else {
315 self.data.code_points().count()
316 };
317 self.len.0.store(len, Relaxed);
319 len
320 }
321
322 pub fn nth_char(&self, index: usize) -> CodePoint {
323 match self.as_str_kind() {
324 PyKindStr::Ascii(s) => s[index].into(),
325 PyKindStr::Utf8(s) => s.chars().nth(index).unwrap().into(),
326 PyKindStr::Wtf8(w) => w.code_points().nth(index).unwrap(),
327 }
328 }
329}
330
331impl core::fmt::Display for StrData {
332 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
333 self.data.fmt(f)
334 }
335}
336
337impl CharLen for StrData {
338 fn char_len(&self) -> usize {
339 self.char_len()
340 }
341}
342
343pub fn try_get_chars(s: &str, range: impl RangeBounds<usize>) -> Option<&str> {
344 let mut chars = s.chars();
345 let start = match range.start_bound() {
346 Bound::Included(&i) => i,
347 Bound::Excluded(&i) => i + 1,
348 Bound::Unbounded => 0,
349 };
350 for _ in 0..start {
351 chars.next()?;
352 }
353 let s = chars.as_str();
354 let range_len = match range.end_bound() {
355 Bound::Included(&i) => i + 1 - start,
356 Bound::Excluded(&i) => i - start,
357 Bound::Unbounded => return Some(s),
358 };
359 char_range_end(s, range_len).map(|end| &s[..end])
360}
361
362pub fn get_chars(s: &str, range: impl RangeBounds<usize>) -> &str {
363 try_get_chars(s, range).unwrap()
364}
365
366#[inline]
367pub fn char_range_end(s: &str, n_chars: usize) -> Option<usize> {
368 let i = match n_chars.checked_sub(1) {
369 Some(last_char_index) => {
370 let (index, c) = s.char_indices().nth(last_char_index)?;
371 index + c.len_utf8()
372 }
373 None => 0,
374 };
375 Some(i)
376}
377
378pub fn try_get_codepoints(w: &Wtf8, range: impl RangeBounds<usize>) -> Option<&Wtf8> {
379 let mut chars = w.code_points();
380 let start = match range.start_bound() {
381 Bound::Included(&i) => i,
382 Bound::Excluded(&i) => i + 1,
383 Bound::Unbounded => 0,
384 };
385 for _ in 0..start {
386 chars.next()?;
387 }
388 let s = chars.as_wtf8();
389 let range_len = match range.end_bound() {
390 Bound::Included(&i) => i + 1 - start,
391 Bound::Excluded(&i) => i - start,
392 Bound::Unbounded => return Some(s),
393 };
394 codepoint_range_end(s, range_len).map(|end| &s[..end])
395}
396
397pub fn get_codepoints(w: &Wtf8, range: impl RangeBounds<usize>) -> &Wtf8 {
398 try_get_codepoints(w, range).unwrap()
399}
400
401#[inline]
402pub fn codepoint_range_end(s: &Wtf8, n_chars: usize) -> Option<usize> {
403 let i = match n_chars.checked_sub(1) {
404 Some(last_char_index) => {
405 let (index, c) = s.code_point_indices().nth(last_char_index)?;
406 index + c.len_wtf8()
407 }
408 None => 0,
409 };
410 Some(i)
411}
412
413pub fn zfill(bytes: &[u8], width: usize) -> Vec<u8> {
414 if width <= bytes.len() {
415 bytes.to_vec()
416 } else {
417 let (sign, s) = match bytes.first() {
418 Some(_sign @ b'+') | Some(_sign @ b'-') => {
419 (unsafe { bytes.get_unchecked(..1) }, &bytes[1..])
420 }
421 _ => (&b""[..], bytes),
422 };
423 let mut filled = Vec::new();
424 filled.extend_from_slice(sign);
425 filled.extend(core::iter::repeat_n(b'0', width - bytes.len()));
426 filled.extend_from_slice(s);
427 filled
428 }
429}
430
431pub fn to_ascii(value: &Wtf8) -> AsciiString {
434 let mut ascii = Vec::new();
435 for cp in value.code_points() {
436 if cp.is_ascii() {
437 ascii.push(cp.to_u32() as u8);
438 } else {
439 let c = cp.to_u32();
440 let hex = if c < 0x100 {
441 format!("\\x{c:02x}")
442 } else if c < 0x10000 {
443 format!("\\u{c:04x}")
444 } else {
445 format!("\\U{c:08x}")
446 };
447 ascii.append(&mut hex.into_bytes());
448 }
449 }
450 unsafe { AsciiString::from_ascii_unchecked(ascii) }
451}
452
453#[derive(Clone, Copy)]
454pub struct UnicodeEscapeCodepoint(pub CodePoint);
455
456impl fmt::Display for UnicodeEscapeCodepoint {
457 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
458 let c = self.0.to_u32();
459 if c >= 0x10000 {
460 write!(f, "\\U{c:08x}")
461 } else if c >= 0x100 {
462 write!(f, "\\u{c:04x}")
463 } else {
464 write!(f, "\\x{c:02x}")
465 }
466 }
467}
468
469pub mod levenshtein {
470 pub const MOVE_COST: usize = 2;
471 const CASE_COST: usize = 1;
472 const MAX_STRING_SIZE: usize = 40;
473
474 const fn substitution_cost(mut a: u8, mut b: u8) -> usize {
475 if (a & 31) != (b & 31) {
476 return MOVE_COST;
477 }
478 if a == b {
479 return 0;
480 }
481 if a.is_ascii_uppercase() {
482 a += b'a' - b'A';
483 }
484 if b.is_ascii_uppercase() {
485 b += b'a' - b'A';
486 }
487 if a == b { CASE_COST } else { MOVE_COST }
488 }
489
490 pub fn levenshtein_distance(a: &[u8], b: &[u8], max_cost: usize) -> usize {
491 if a == b {
492 return 0;
493 }
494
495 let (mut a_bytes, mut b_bytes) = (a, b);
496 let (mut a_begin, mut a_end) = (0usize, a.len());
497 let (mut b_begin, mut b_end) = (0usize, b.len());
498
499 while a_end > 0 && b_end > 0 && (a_bytes[a_begin] == b_bytes[b_begin]) {
500 a_begin += 1;
501 b_begin += 1;
502 a_end -= 1;
503 b_end -= 1;
504 }
505 while a_end > 0
506 && b_end > 0
507 && (a_bytes[a_begin + a_end - 1] == b_bytes[b_begin + b_end - 1])
508 {
509 a_end -= 1;
510 b_end -= 1;
511 }
512 if a_end == 0 || b_end == 0 {
513 return (a_end + b_end) * MOVE_COST;
514 }
515 if a_end > MAX_STRING_SIZE || b_end > MAX_STRING_SIZE {
516 return max_cost + 1;
517 }
518
519 if b_end < a_end {
520 core::mem::swap(&mut a_bytes, &mut b_bytes);
521 core::mem::swap(&mut a_begin, &mut b_begin);
522 core::mem::swap(&mut a_end, &mut b_end);
523 }
524
525 if (b_end - a_end) * MOVE_COST > max_cost {
526 return max_cost + 1;
527 }
528
529 let mut buffer = [0usize; MAX_STRING_SIZE];
530
531 for (i, x) in buffer.iter_mut().take(a_end).enumerate() {
532 *x = (i + 1) * MOVE_COST;
533 }
534
535 let mut result = 0usize;
536 for (b_index, b_code) in b_bytes[b_begin..(b_begin + b_end)].iter().enumerate() {
537 result = b_index * MOVE_COST;
538 let mut distance = result;
539 let mut minimum = usize::MAX;
540 for (a_index, a_code) in a_bytes[a_begin..(a_begin + a_end)].iter().enumerate() {
541 let substitute = distance + substitution_cost(*b_code, *a_code);
542 distance = buffer[a_index];
543 let insert_delete = usize::min(result, distance) + MOVE_COST;
544 result = usize::min(insert_delete, substitute);
545
546 buffer[a_index] = result;
547 if result < minimum {
548 minimum = result;
549 }
550 }
551 if minimum > max_cost {
552 return max_cost + 1;
553 }
554 }
555 result
556 }
557}
558
559pub fn expandtabs(input: &str, tab_size: usize) -> String {
561 let tab_stop = tab_size;
562 let mut expanded_str = String::with_capacity(input.len());
563 let mut tab_size = tab_stop;
564 let mut col_count = 0usize;
565 for ch in input.chars() {
566 match ch {
567 '\t' => {
568 let num_spaces = tab_size - col_count;
569 col_count += num_spaces;
570 let expand = " ".repeat(num_spaces);
571 expanded_str.push_str(&expand);
572 }
573 '\r' | '\n' => {
574 expanded_str.push(ch);
575 col_count = 0;
576 tab_size = 0;
577 }
578 _ => {
579 expanded_str.push(ch);
580 col_count += 1;
581 }
582 }
583 if col_count >= tab_size {
584 tab_size += tab_stop;
585 }
586 }
587 expanded_str
588}
589
590#[macro_export]
598macro_rules! ascii {
599 ($x:expr $(,)?) => {{
600 let s = const {
601 let s: &str = $x;
602 assert!(s.is_ascii(), "ascii!() argument is not an ascii string");
603 s
604 };
605 unsafe { $crate::vendored::ascii::AsciiStr::from_ascii_unchecked(s.as_bytes()) }
606 }};
607}
608pub use ascii;
609
610const UNICODE_DECIMAL_VALUES: &[char] = &[
612 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '٠', '١', '٢', '٣', '٤', '٥', '٦', '٧', '٨',
613 '٩', '۰', '۱', '۲', '۳', '۴', '۵', '۶', '۷', '۸', '۹', '߀', '߁', '߂', '߃', '߄', '߅', '߆', '߇',
614 '߈', '߉', '०', '१', '२', '३', '४', '५', '६', '७', '८', '९', '০', '১', '২', '৩', '৪', '৫', '৬',
615 '৭', '৮', '৯', '੦', '੧', '੨', '੩', '੪', '੫', '੬', '੭', '੮', '੯', '૦', '૧', '૨', '૩', '૪', '૫',
616 '૬', '૭', '૮', '૯', '୦', '୧', '୨', '୩', '୪', '୫', '୬', '୭', '୮', '୯', '௦', '௧', '௨', '௩', '௪',
617 '௫', '௬', '௭', '௮', '௯', '౦', '౧', '౨', '౩', '౪', '౫', '౬', '౭', '౮', '౯', '೦', '೧', '೨', '೩',
618 '೪', '೫', '೬', '೭', '೮', '೯', '൦', '൧', '൨', '൩', '൪', '൫', '൬', '൭', '൮', '൯', '෦', '෧', '෨',
619 '෩', '෪', '෫', '෬', '෭', '෮', '෯', '๐', '๑', '๒', '๓', '๔', '๕', '๖', '๗', '๘', '๙', '໐', '໑',
620 '໒', '໓', '໔', '໕', '໖', '໗', '໘', '໙', '༠', '༡', '༢', '༣', '༤', '༥', '༦', '༧', '༨', '༩', '၀',
621 '၁', '၂', '၃', '၄', '၅', '၆', '၇', '၈', '၉', '႐', '႑', '႒', '႓', '႔', '႕', '႖', '႗', '႘', '႙',
622 '០', '១', '២', '៣', '៤', '៥', '៦', '៧', '៨', '៩', '᠐', '᠑', '᠒', '᠓', '᠔', '᠕', '᠖', '᠗', '᠘',
623 '᠙', '᥆', '᥇', '᥈', '᥉', '᥊', '᥋', '᥌', '᥍', '᥎', '᥏', '᧐', '᧑', '᧒', '᧓', '᧔', '᧕', '᧖', '᧗',
624 '᧘', '᧙', '᪀', '᪁', '᪂', '᪃', '᪄', '᪅', '᪆', '᪇', '᪈', '᪉', '᪐', '᪑', '᪒', '᪓', '᪔', '᪕', '᪖',
625 '᪗', '᪘', '᪙', '᭐', '᭑', '᭒', '᭓', '᭔', '᭕', '᭖', '᭗', '᭘', '᭙', '᮰', '᮱', '᮲', '᮳', '᮴', '᮵',
626 '᮶', '᮷', '᮸', '᮹', '᱀', '᱁', '᱂', '᱃', '᱄', '᱅', '᱆', '᱇', '᱈', '᱉', '᱐', '᱑', '᱒', '᱓', '᱔',
627 '᱕', '᱖', '᱗', '᱘', '᱙', '꘠', '꘡', '꘢', '꘣', '꘤', '꘥', '꘦', '꘧', '꘨', '꘩', '꣐', '꣑', '꣒', '꣓',
628 '꣔', '꣕', '꣖', '꣗', '꣘', '꣙', '꤀', '꤁', '꤂', '꤃', '꤄', '꤅', '꤆', '꤇', '꤈', '꤉', '꧐', '꧑', '꧒',
629 '꧓', '꧔', '꧕', '꧖', '꧗', '꧘', '꧙', '꧰', '꧱', '꧲', '꧳', '꧴', '꧵', '꧶', '꧷', '꧸', '꧹', '꩐', '꩑',
630 '꩒', '꩓', '꩔', '꩕', '꩖', '꩗', '꩘', '꩙', '꯰', '꯱', '꯲', '꯳', '꯴', '꯵', '꯶', '꯷', '꯸', '꯹', '0',
631 '1', '2', '3', '4', '5', '6', '7', '8', '9', '𐒠', '𐒡', '𐒢', '𐒣', '𐒤', '𐒥', '𐒦', '𐒧',
632 '𐒨', '𐒩', '𑁦', '𑁧', '𑁨', '𑁩', '𑁪', '𑁫', '𑁬', '𑁭', '𑁮', '𑁯', '𑃰', '𑃱', '𑃲', '𑃳', '𑃴', '𑃵', '𑃶',
633 '𑃷', '𑃸', '𑃹', '𑄶', '𑄷', '𑄸', '𑄹', '𑄺', '𑄻', '𑄼', '𑄽', '𑄾', '𑄿', '𑇐', '𑇑', '𑇒', '𑇓', '𑇔', '𑇕',
634 '𑇖', '𑇗', '𑇘', '𑇙', '𑋰', '𑋱', '𑋲', '𑋳', '𑋴', '𑋵', '𑋶', '𑋷', '𑋸', '𑋹', '𑑐', '𑑑', '𑑒', '𑑓', '𑑔',
635 '𑑕', '𑑖', '𑑗', '𑑘', '𑑙', '𑓐', '𑓑', '𑓒', '𑓓', '𑓔', '𑓕', '𑓖', '𑓗', '𑓘', '𑓙', '𑙐', '𑙑', '𑙒', '𑙓',
636 '𑙔', '𑙕', '𑙖', '𑙗', '𑙘', '𑙙', '𑛀', '𑛁', '𑛂', '𑛃', '𑛄', '𑛅', '𑛆', '𑛇', '𑛈', '𑛉', '𑜰', '𑜱', '𑜲',
637 '𑜳', '𑜴', '𑜵', '𑜶', '𑜷', '𑜸', '𑜹', '𑣠', '𑣡', '𑣢', '𑣣', '𑣤', '𑣥', '𑣦', '𑣧', '𑣨', '𑣩', '𑱐', '𑱑',
638 '𑱒', '𑱓', '𑱔', '𑱕', '𑱖', '𑱗', '𑱘', '𑱙', '𑵐', '𑵑', '𑵒', '𑵓', '𑵔', '𑵕', '𑵖', '𑵗', '𑵘', '𑵙', '𖩠',
639 '𖩡', '𖩢', '𖩣', '𖩤', '𖩥', '𖩦', '𖩧', '𖩨', '𖩩', '𖭐', '𖭑', '𖭒', '𖭓', '𖭔', '𖭕', '𖭖', '𖭗', '𖭘', '𖭙',
640 '𝟎', '𝟏', '𝟐', '𝟑', '𝟒', '𝟓', '𝟔', '𝟕', '𝟖', '𝟗', '𝟘', '𝟙', '𝟚', '𝟛', '𝟜', '𝟝', '𝟞', '𝟟', '𝟠',
641 '𝟡', '𝟢', '𝟣', '𝟤', '𝟥', '𝟦', '𝟧', '𝟨', '𝟩', '𝟪', '𝟫', '𝟬', '𝟭', '𝟮', '𝟯', '𝟰', '𝟱', '𝟲', '𝟳',
642 '𝟴', '𝟵', '𝟶', '𝟷', '𝟸', '𝟹', '𝟺', '𝟻', '𝟼', '𝟽', '𝟾', '𝟿', '𞥐', '𞥑', '𞥒', '𞥓', '𞥔', '𞥕', '𞥖',
643 '𞥗', '𞥘', '𞥙',
644];
645
646pub fn char_to_decimal(ch: char) -> Option<u8> {
647 UNICODE_DECIMAL_VALUES
648 .binary_search(&ch)
649 .ok()
650 .map(|i| (i % 10) as u8)
651}
652
653#[cfg(test)]
654mod tests {
655 use super::*;
656
657 #[test]
658 fn test_get_chars() {
659 let s = "0123456789";
660 assert_eq!(get_chars(s, 3..7), "3456");
661 assert_eq!(get_chars(s, 3..7), &s[3..7]);
662
663 let s = "0유니코드 문자열9";
664 assert_eq!(get_chars(s, 3..7), "코드 문");
665
666 let s = "0😀😃😄😁😆😅😂🤣9";
667 assert_eq!(get_chars(s, 3..7), "😄😁😆😅");
668 }
669}