1use std::cmp::Ordering;
7use std::ffi::CStr;
8use std::mem;
9use std::mem::MaybeUninit;
10use std::ops::Range;
11use std::ptr::{null, null_mut};
12
13use crate::arena::{Arena, ArenaString, scratch_arena};
14use crate::buffer::TextBuffer;
15use crate::unicode::Utf8Chars;
16use crate::{apperr, arena_format, sys};
17
18#[derive(Clone, Copy)]
19pub struct Encoding {
20 pub label: &'static str,
21 pub canonical: &'static str,
22}
23
24pub struct Encodings {
25 pub preferred: &'static [Encoding],
26 pub all: &'static [Encoding],
27}
28
29static mut ENCODINGS: Encodings = Encodings { preferred: &[], all: &[] };
30
31pub fn get_available_encodings() -> &'static Encodings {
33 #[allow(static_mut_refs)]
35 unsafe {
36 if ENCODINGS.all.is_empty() {
37 let scratch = scratch_arena(None);
38 let mut preferred = Vec::new_in(&*scratch);
39 let mut alternative = Vec::new_in(&*scratch);
40
41 preferred.push(Encoding { label: "UTF-8", canonical: "UTF-8" });
43 preferred.push(Encoding { label: "UTF-8 BOM", canonical: "UTF-8 BOM" });
44
45 if let Ok(f) = init_if_needed() {
46 let mut n = 0;
47 loop {
48 let name = (f.ucnv_getAvailableName)(n);
49 if name.is_null() {
50 break;
51 }
52
53 n += 1;
54
55 let name = CStr::from_ptr(name).to_str().unwrap_unchecked();
56 if name.is_empty() || name == "UTF-8" {
60 continue;
61 }
62
63 let mut status = icu_ffi::U_ZERO_ERROR;
64 let mime = (f.ucnv_getStandardName)(
65 name.as_ptr(),
66 c"MIME".as_ptr() as *const _,
67 &mut status,
68 );
69 if !mime.is_null() && status.is_success() {
70 let mime = CStr::from_ptr(mime).to_str().unwrap_unchecked();
71 preferred.push(Encoding { label: mime, canonical: name });
72 } else {
73 alternative.push(Encoding { label: name, canonical: name });
74 }
75 }
76 }
77
78 let preferred_len = preferred.len();
79
80 let mut all = Vec::with_capacity(preferred.len() + alternative.len());
82 all.extend(preferred);
83 all.extend(alternative);
84
85 let all = all.leak();
86 ENCODINGS.preferred = &all[..preferred_len];
87 ENCODINGS.all = &all[..];
88 }
89
90 &ENCODINGS
91 }
92}
93
94pub fn apperr_format(f: &mut std::fmt::Formatter<'_>, code: u32) -> std::fmt::Result {
96 fn format(code: u32) -> &'static str {
97 let Ok(f) = init_if_needed() else {
98 return "";
99 };
100
101 let status = icu_ffi::UErrorCode::new(code);
102 let ptr = unsafe { (f.u_errorName)(status) };
103 if ptr.is_null() {
104 return "";
105 }
106
107 let str = unsafe { CStr::from_ptr(ptr) };
108 str.to_str().unwrap_or("")
109 }
110
111 let msg = format(code);
112 if !msg.is_empty() {
113 write!(f, "ICU Error: {msg}")
114 } else {
115 write!(f, "ICU Error: {code:#08x}")
116 }
117}
118
119pub struct Converter<'pivot> {
121 source: *mut icu_ffi::UConverter,
122 target: *mut icu_ffi::UConverter,
123 pivot_buffer: &'pivot mut [MaybeUninit<u16>],
124 pivot_source: *mut u16,
125 pivot_target: *mut u16,
126 reset: bool,
127}
128
129impl Drop for Converter<'_> {
130 fn drop(&mut self) {
131 let f = assume_loaded();
132 unsafe { (f.ucnv_close)(self.source) };
133 unsafe { (f.ucnv_close)(self.target) };
134 }
135}
136
137impl<'pivot> Converter<'pivot> {
138 pub fn new(
147 pivot_buffer: &'pivot mut [MaybeUninit<u16>],
148 source_encoding: &str,
149 target_encoding: &str,
150 ) -> apperr::Result<Self> {
151 let f = init_if_needed()?;
152
153 let arena = scratch_arena(None);
154 let source_encoding = Self::append_nul(&arena, source_encoding);
155 let target_encoding = Self::append_nul(&arena, target_encoding);
156
157 let mut status = icu_ffi::U_ZERO_ERROR;
158 let source = unsafe { (f.ucnv_open)(source_encoding.as_ptr(), &mut status) };
159 let target = unsafe { (f.ucnv_open)(target_encoding.as_ptr(), &mut status) };
160 if status.is_failure() {
161 if !source.is_null() {
162 unsafe { (f.ucnv_close)(source) };
163 }
164 if !target.is_null() {
165 unsafe { (f.ucnv_close)(target) };
166 }
167 return Err(status.as_error());
168 }
169
170 let pivot_source = pivot_buffer.as_mut_ptr() as *mut u16;
171 let pivot_target = unsafe { pivot_source.add(pivot_buffer.len()) };
172
173 Ok(Self { source, target, pivot_buffer, pivot_source, pivot_target, reset: true })
174 }
175
176 fn append_nul<'a>(arena: &'a Arena, input: &str) -> ArenaString<'a> {
177 arena_format!(arena, "{}\0", input)
178 }
179
180 pub fn convert(
195 &mut self,
196 input: &[u8],
197 output: &mut [MaybeUninit<u8>],
198 ) -> apperr::Result<(usize, usize)> {
199 let f = assume_loaded();
200
201 let input_beg = input.as_ptr();
202 let input_end = unsafe { input_beg.add(input.len()) };
203 let mut input_ptr = input_beg;
204
205 let output_beg = output.as_mut_ptr() as *mut u8;
206 let output_end = unsafe { output_beg.add(output.len()) };
207 let mut output_ptr = output_beg;
208
209 let pivot_beg = self.pivot_buffer.as_mut_ptr() as *mut u16;
210 let pivot_end = unsafe { pivot_beg.add(self.pivot_buffer.len()) };
211
212 let flush = input.is_empty();
213 let mut status = icu_ffi::U_ZERO_ERROR;
214
215 unsafe {
216 (f.ucnv_convertEx)(
217 self.target,
218 self.source,
219 &mut output_ptr,
220 output_end,
221 &mut input_ptr,
222 input_end,
223 pivot_beg,
224 &mut self.pivot_source,
225 &mut self.pivot_target,
226 pivot_end,
227 self.reset,
228 flush,
229 &mut status,
230 );
231 }
232
233 self.reset = false;
234 if status.is_failure() && status != icu_ffi::U_BUFFER_OVERFLOW_ERROR {
235 return Err(status.as_error());
236 }
237
238 let input_advance = unsafe { input_ptr.offset_from(input_beg) as usize };
239 let output_advance = unsafe { output_ptr.offset_from(output_beg) as usize };
240 Ok((input_advance, output_advance))
241 }
242}
243
244const CACHE_SIZE: usize = 64;
247
248#[repr(C)]
250struct Cache {
251 utf16: [u16; CACHE_SIZE],
253 utf16_to_utf8_offsets: [u16; CACHE_SIZE],
257 utf8_to_utf16_offsets: [u16; CACHE_SIZE],
261
262 utf16_len: usize,
264 native_indexing_limit: usize,
267
268 utf8_range: Range<usize>,
270}
271
272#[repr(C)]
273struct DoubleCache {
274 cache: [Cache; 2],
275 mru: bool,
277}
278
279pub struct Text(&'static mut icu_ffi::UText);
289
290impl Drop for Text {
291 fn drop(&mut self) {
292 let f = assume_loaded();
293 unsafe { (f.utext_close)(self.0) };
294 }
295}
296
297impl Text {
298 pub unsafe fn new(tb: &TextBuffer) -> apperr::Result<Self> {
305 let f = init_if_needed()?;
306
307 let mut status = icu_ffi::U_ZERO_ERROR;
308 let ptr =
309 unsafe { (f.utext_setup)(null_mut(), size_of::<DoubleCache>() as i32, &mut status) };
310 if status.is_failure() {
311 return Err(status.as_error());
312 }
313
314 const FUNCS: icu_ffi::UTextFuncs = icu_ffi::UTextFuncs {
315 table_size: size_of::<icu_ffi::UTextFuncs>() as i32,
316 reserved1: 0,
317 reserved2: 0,
318 reserved3: 0,
319 clone: Some(utext_clone),
320 native_length: Some(utext_native_length),
321 access: Some(utext_access),
322 extract: None,
323 replace: None,
324 copy: None,
325 map_offset_to_native: Some(utext_map_offset_to_native),
326 map_native_index_to_utf16: Some(utext_map_native_index_to_utf16),
327 close: None,
328 spare1: None,
329 spare2: None,
330 spare3: None,
331 };
332
333 let ut = unsafe { &mut *ptr };
334 ut.p_funcs = &FUNCS;
335 ut.context = tb as *const TextBuffer as *mut _;
336 ut.a = -1;
337
338 Ok(Self(ut))
339 }
340}
341
342fn text_buffer_from_utext<'a>(ut: &icu_ffi::UText) -> &'a TextBuffer {
343 unsafe { &*(ut.context as *const TextBuffer) }
344}
345
346fn double_cache_from_utext<'a>(ut: &icu_ffi::UText) -> &'a mut DoubleCache {
347 unsafe { &mut *(ut.p_extra as *mut DoubleCache) }
348}
349
350extern "C" fn utext_clone(
351 dest: *mut icu_ffi::UText,
352 src: &icu_ffi::UText,
353 deep: bool,
354 status: &mut icu_ffi::UErrorCode,
355) -> *mut icu_ffi::UText {
356 if status.is_failure() {
357 return null_mut();
358 }
359
360 if deep {
361 *status = icu_ffi::U_UNSUPPORTED_ERROR;
362 return null_mut();
363 }
364
365 let f = assume_loaded();
366 let ut_ptr = unsafe { (f.utext_setup)(dest, size_of::<DoubleCache>() as i32, status) };
367 if status.is_failure() {
368 return null_mut();
369 }
370
371 unsafe {
375 let ut = &mut *ut_ptr;
376 ut.p_funcs = src.p_funcs;
377 ut.context = src.context;
378 ut.a = -1;
379 }
380
381 ut_ptr
382}
383
384extern "C" fn utext_native_length(ut: &mut icu_ffi::UText) -> i64 {
385 let tb = text_buffer_from_utext(ut);
386 tb.text_length() as i64
387}
388
389extern "C" fn utext_access(ut: &mut icu_ffi::UText, native_index: i64, forward: bool) -> bool {
390 if let Some(cache) = utext_access_impl(ut, native_index, forward) {
391 let native_off = native_index as usize - cache.utf8_range.start;
392 ut.chunk_contents = cache.utf16.as_ptr();
393 ut.chunk_length = cache.utf16_len as i32;
394 ut.chunk_offset = cache.utf8_to_utf16_offsets[native_off] as i32;
395 ut.chunk_native_start = cache.utf8_range.start as i64;
396 ut.chunk_native_limit = cache.utf8_range.end as i64;
397 ut.native_indexing_limit = cache.native_indexing_limit as i32;
398 true
399 } else {
400 false
401 }
402}
403
404fn utext_access_impl<'a>(
405 ut: &mut icu_ffi::UText,
406 native_index: i64,
407 forward: bool,
408) -> Option<&'a mut Cache> {
409 let tb = text_buffer_from_utext(ut);
410 let mut index_contained = native_index;
411
412 if !forward {
413 index_contained -= 1;
414 }
415 if index_contained < 0 || index_contained as usize >= tb.text_length() {
416 return None;
417 }
418
419 let index_contained = index_contained as usize;
420 let native_index = native_index as usize;
421 let double_cache = double_cache_from_utext(ut);
422 let dirty = ut.a != tb.generation() as i64;
423
424 if dirty {
425 double_cache.cache[0].utf16_len = 0;
429 double_cache.cache[1].utf16_len = 0;
430 double_cache.cache[0].utf8_range = 0..0;
431 double_cache.cache[1].utf8_range = 0..0;
432 ut.a = tb.generation() as i64;
433 } else {
434 for (i, cache) in double_cache.cache.iter_mut().enumerate() {
436 if cache.utf8_range.contains(&index_contained) {
437 double_cache.mru = i != 0;
438 return Some(cache);
439 }
440 }
441 }
442
443 let double_cache = double_cache_from_utext(ut);
445 double_cache.mru = !double_cache.mru;
446 let cache = &mut double_cache.cache[double_cache.mru as usize];
447
448 const UTF16_LEN_LIMIT: usize = CACHE_SIZE - 4;
451 let utf8_len_limit;
452 let native_start;
453
454 if forward {
455 utf8_len_limit = (tb.text_length() - native_index).min(UTF16_LEN_LIMIT);
456 native_start = native_index;
457 } else {
458 utf8_len_limit = native_index.min(UTF16_LEN_LIMIT);
463
464 let mut beg = native_index - utf8_len_limit;
468 let chunk = tb.read_forward(beg);
469 for &c in chunk {
470 if c & 0b1100_0000 != 0b1000_0000 {
471 break;
472 }
473 beg += 1;
474 }
475
476 native_start = beg;
477 }
478
479 let mut utf16_len = 0;
483 let mut utf8_len = 0;
484 let mut ascii_len = 0;
485 'outer: loop {
486 let initial_utf8_len = utf8_len;
487 let chunk = tb.read_forward(native_start + utf8_len);
488 if chunk.is_empty() {
489 break;
490 }
491
492 let mut it = Utf8Chars::new(chunk, 0);
493
494 if utf16_len == ascii_len {
497 let haystack = &chunk[..chunk.len().min(utf8_len_limit - ascii_len)];
498
499 let len = haystack.iter().position(|&c| c >= 0x80).unwrap_or(haystack.len());
502
503 for &c in &chunk[..len] {
507 unsafe {
508 *cache.utf16.get_unchecked_mut(ascii_len) = c as u16;
509 *cache.utf16_to_utf8_offsets.get_unchecked_mut(ascii_len) = ascii_len as u16;
510 *cache.utf8_to_utf16_offsets.get_unchecked_mut(ascii_len) = ascii_len as u16;
511 }
512 ascii_len += 1;
513 }
514
515 utf16_len += len;
516 utf8_len += len;
517 it.seek(len);
518 if ascii_len >= UTF16_LEN_LIMIT {
519 break;
520 }
521 }
522
523 loop {
524 let Some(c) = it.next() else {
525 break;
526 };
527
528 unsafe {
531 let utf8_len_beg = utf8_len;
532 let utf8_len_end = initial_utf8_len + it.offset();
533
534 while utf8_len < utf8_len_end {
535 *cache.utf8_to_utf16_offsets.get_unchecked_mut(utf8_len) = utf16_len as u16;
536 utf8_len += 1;
537 }
538
539 if c <= '\u{FFFF}' {
540 *cache.utf16.get_unchecked_mut(utf16_len) = c as u16;
541 *cache.utf16_to_utf8_offsets.get_unchecked_mut(utf16_len) = utf8_len_beg as u16;
542 utf16_len += 1;
543 } else {
544 let c = c as u32 - 0x10000;
545 let b = utf8_len_beg as u16;
546 *cache.utf16.get_unchecked_mut(utf16_len) = (c >> 10) as u16 | 0xD800;
547 *cache.utf16.get_unchecked_mut(utf16_len + 1) = (c & 0x3FF) as u16 | 0xDC00;
548 *cache.utf16_to_utf8_offsets.get_unchecked_mut(utf16_len) = b;
549 *cache.utf16_to_utf8_offsets.get_unchecked_mut(utf16_len + 1) = b;
550 utf16_len += 2;
551 }
552 }
553
554 if utf16_len >= UTF16_LEN_LIMIT || utf8_len >= utf8_len_limit {
555 break 'outer;
556 }
557 }
558 }
559
560 cache.utf16_to_utf8_offsets[utf16_len] = utf8_len as u16;
563 cache.utf8_to_utf16_offsets[utf8_len] = utf16_len as u16;
564
565 let native_limit = native_start + utf8_len;
566 cache.utf16_len = utf16_len;
567 cache.native_indexing_limit = ascii_len;
571 cache.utf8_range = native_start..native_limit;
572 Some(cache)
573}
574
575extern "C" fn utext_map_offset_to_native(ut: &icu_ffi::UText) -> i64 {
576 debug_assert!((0..=ut.chunk_length).contains(&ut.chunk_offset));
577
578 let double_cache = double_cache_from_utext(ut);
579 let cache = &double_cache.cache[double_cache.mru as usize];
580 let off_rel = cache.utf16_to_utf8_offsets[ut.chunk_offset as usize];
581 let off_abs = cache.utf8_range.start + off_rel as usize;
582 off_abs as i64
583}
584
585extern "C" fn utext_map_native_index_to_utf16(ut: &icu_ffi::UText, native_index: i64) -> i32 {
586 debug_assert!((ut.chunk_native_start..=ut.chunk_native_limit).contains(&native_index));
587
588 let double_cache = double_cache_from_utext(ut);
589 let cache = &double_cache.cache[double_cache.mru as usize];
590 let off_rel = cache.utf8_to_utf16_offsets[(native_index - ut.chunk_native_start) as usize];
591 off_rel as i32
592}
593
594pub struct Regex(&'static mut icu_ffi::URegularExpression);
600
601impl Drop for Regex {
602 fn drop(&mut self) {
603 let f = assume_loaded();
604 unsafe { (f.uregex_close)(self.0) };
605 }
606}
607
608impl Regex {
609 pub const CASE_INSENSITIVE: i32 = icu_ffi::UREGEX_CASE_INSENSITIVE;
611
612 pub const MULTILINE: i32 = icu_ffi::UREGEX_MULTILINE;
615
616 pub const LITERAL: i32 = icu_ffi::UREGEX_LITERAL;
618
619 pub unsafe fn new(pattern: &str, flags: i32, text: &Text) -> apperr::Result<Self> {
625 let f = init_if_needed()?;
626 unsafe {
627 let scratch = scratch_arena(None);
628 let mut utf16 = Vec::new_in(&*scratch);
629 let mut status = icu_ffi::U_ZERO_ERROR;
630
631 utf16.extend(pattern.encode_utf16());
632
633 let ptr = (f.uregex_open)(
634 utf16.as_ptr(),
635 utf16.len() as i32,
636 icu_ffi::UREGEX_MULTILINE | icu_ffi::UREGEX_ERROR_ON_UNKNOWN_ESCAPES | flags,
637 None,
638 &mut status,
639 );
640 (f.uregex_setTimeLimit)(ptr, 4096, &mut status);
644 (f.uregex_setUText)(ptr, text.0 as *const _ as *mut _, &mut status);
645 if status.is_failure() {
646 return Err(status.as_error());
647 }
648
649 Ok(Self(&mut *ptr))
650 }
651 }
652
653 pub unsafe fn set_text(&mut self, text: &mut Text, offset: usize) {
661 utext_access(text.0, offset as i64, true);
665
666 let f = assume_loaded();
667 let mut status = icu_ffi::U_ZERO_ERROR;
668 unsafe { (f.uregex_setUText)(self.0, text.0 as *const _ as *mut _, &mut status) };
669 unsafe { (f.uregex_reset64)(self.0, offset as i64, &mut status) };
672 }
673
674 pub fn reset(&mut self, offset: usize) {
676 let f = assume_loaded();
677 let mut status = icu_ffi::U_ZERO_ERROR;
678 unsafe { (f.uregex_reset64)(self.0, offset as i64, &mut status) };
679 }
680}
681
682impl Iterator for Regex {
683 type Item = Range<usize>;
684
685 fn next(&mut self) -> Option<Self::Item> {
686 let f = assume_loaded();
687
688 let mut status = icu_ffi::U_ZERO_ERROR;
689 let ok = unsafe { (f.uregex_findNext)(self.0, &mut status) };
690 if !ok {
691 return None;
692 }
693
694 let start = unsafe { (f.uregex_start64)(self.0, 0, &mut status) };
695 let end = unsafe { (f.uregex_end64)(self.0, 0, &mut status) };
696 if status.is_failure() {
697 return None;
698 }
699
700 let start = start.max(0);
701 let end = end.max(start);
702 Some(start as usize..end as usize)
703 }
704}
705
706static mut ROOT_COLLATOR: Option<*mut icu_ffi::UCollator> = None;
707
708pub fn compare_strings(a: &[u8], b: &[u8]) -> Ordering {
710 #[cold]
711 fn init() {
712 unsafe {
713 let mut coll = null_mut();
714
715 if let Ok(f) = init_if_needed() {
716 let mut status = icu_ffi::U_ZERO_ERROR;
717 coll = (f.ucol_open)(c"".as_ptr(), &mut status);
718 }
719
720 ROOT_COLLATOR = Some(coll);
721 }
722 }
723
724 #[allow(static_mut_refs)]
726 let coll = unsafe {
727 if ROOT_COLLATOR.is_none() {
728 init();
729 }
730 ROOT_COLLATOR.unwrap_unchecked()
731 };
732
733 if coll.is_null() {
734 compare_strings_ascii(a, b)
735 } else {
736 let f = assume_loaded();
737 let mut status = icu_ffi::U_ZERO_ERROR;
738 let res = unsafe {
739 (f.ucol_strcollUTF8)(
740 coll,
741 a.as_ptr(),
742 a.len() as i32,
743 b.as_ptr(),
744 b.len() as i32,
745 &mut status,
746 )
747 };
748
749 match res {
750 icu_ffi::UCollationResult::UCOL_EQUAL => Ordering::Equal,
751 icu_ffi::UCollationResult::UCOL_GREATER => Ordering::Greater,
752 icu_ffi::UCollationResult::UCOL_LESS => Ordering::Less,
753 }
754 }
755}
756
757fn compare_strings_ascii(a: &[u8], b: &[u8]) -> Ordering {
759 let mut iter = a.iter().zip(b.iter());
760
761 while let Some((&a, &b)) = iter.next() {
766 if a != b {
767 let mut order = a.cmp(&b);
768 let la = a.to_ascii_lowercase();
769 let lb = b.to_ascii_lowercase();
770
771 if la == lb {
772 for (a, b) in iter {
775 let la = a.to_ascii_lowercase();
776 let lb = b.to_ascii_lowercase();
777
778 if la != lb {
779 order = la.cmp(&lb);
780 break;
781 }
782 }
783 }
784
785 return order;
786 }
787 }
788
789 a.len().cmp(&b.len())
791}
792
793static mut ROOT_CASEMAP: Option<*mut icu_ffi::UCaseMap> = None;
794
795pub fn fold_case<'a>(arena: &'a Arena, input: &str) -> ArenaString<'a> {
800 #[allow(static_mut_refs)]
802 let casemap = unsafe {
803 if ROOT_CASEMAP.is_none() {
804 ROOT_CASEMAP = Some(if let Ok(f) = init_if_needed() {
805 let mut status = icu_ffi::U_ZERO_ERROR;
806 (f.ucasemap_open)(null(), 0, &mut status)
807 } else {
808 null_mut()
809 })
810 }
811 ROOT_CASEMAP.unwrap_unchecked()
812 };
813
814 if !casemap.is_null() {
815 let f = assume_loaded();
816 let mut status = icu_ffi::U_ZERO_ERROR;
817 let mut output = Vec::new_in(arena);
818 let mut output_len;
819
820 {
823 output.reserve_exact(input.len() + 16);
824 let output = output.spare_capacity_mut();
825 output_len = unsafe {
826 (f.ucasemap_utf8FoldCase)(
827 casemap,
828 output.as_mut_ptr() as *mut _,
829 output.len() as i32,
830 input.as_ptr() as *const _,
831 input.len() as i32,
832 &mut status,
833 )
834 };
835 }
836
837 if status == icu_ffi::U_BUFFER_OVERFLOW_ERROR && output_len > 0 {
839 output.reserve_exact(output_len as usize);
840 let output = output.spare_capacity_mut();
841 output_len = unsafe {
842 (f.ucasemap_utf8FoldCase)(
843 casemap,
844 output.as_mut_ptr() as *mut _,
845 output.len() as i32,
846 input.as_ptr() as *const _,
847 input.len() as i32,
848 &mut status,
849 )
850 };
851 }
852
853 if status.is_success() && output_len > 0 {
854 unsafe {
855 output.set_len(output_len as usize);
856 }
857 return unsafe { ArenaString::from_utf8_unchecked(output) };
858 }
859 }
860
861 let mut result = ArenaString::from_str(arena, input);
862 for b in unsafe { result.as_bytes_mut() } {
863 b.make_ascii_lowercase();
864 }
865 result
866}
867
868#[allow(non_snake_case)]
880#[repr(C)]
881struct LibraryFunctions {
882 u_errorName: icu_ffi::u_errorName,
884 ucasemap_open: icu_ffi::ucasemap_open,
885 ucasemap_utf8FoldCase: icu_ffi::ucasemap_utf8FoldCase,
886 ucnv_getAvailableName: icu_ffi::ucnv_getAvailableName,
887 ucnv_getStandardName: icu_ffi::ucnv_getStandardName,
888 ucnv_open: icu_ffi::ucnv_open,
889 ucnv_close: icu_ffi::ucnv_close,
890 ucnv_convertEx: icu_ffi::ucnv_convertEx,
891 utext_setup: icu_ffi::utext_setup,
892 utext_close: icu_ffi::utext_close,
893
894 ucol_open: icu_ffi::ucol_open,
896 ucol_strcollUTF8: icu_ffi::ucol_strcollUTF8,
897 uregex_open: icu_ffi::uregex_open,
898 uregex_close: icu_ffi::uregex_close,
899 uregex_setTimeLimit: icu_ffi::uregex_setTimeLimit,
900 uregex_setUText: icu_ffi::uregex_setUText,
901 uregex_reset64: icu_ffi::uregex_reset64,
902 uregex_findNext: icu_ffi::uregex_findNext,
903 uregex_start64: icu_ffi::uregex_start64,
904 uregex_end64: icu_ffi::uregex_end64,
905}
906
907const LIBICUUC_PROC_NAMES: [&CStr; 10] = [
909 c"u_errorName",
910 c"ucasemap_open",
911 c"ucasemap_utf8FoldCase",
912 c"ucnv_getAvailableName",
913 c"ucnv_getStandardName",
914 c"ucnv_open",
915 c"ucnv_close",
916 c"ucnv_convertEx",
917 c"utext_setup",
918 c"utext_close",
919];
920
921const LIBICUI18N_PROC_NAMES: [&CStr; 10] = [
923 c"ucol_open",
924 c"ucol_strcollUTF8",
925 c"uregex_open",
926 c"uregex_close",
927 c"uregex_setTimeLimit",
928 c"uregex_setUText",
929 c"uregex_reset64",
930 c"uregex_findNext",
931 c"uregex_start64",
932 c"uregex_end64",
933];
934
935enum LibraryFunctionsState {
936 Uninitialized,
937 Failed,
938 Loaded(LibraryFunctions),
939}
940
941static mut LIBRARY_FUNCTIONS: LibraryFunctionsState = LibraryFunctionsState::Uninitialized;
942
943pub fn init() -> apperr::Result<()> {
944 init_if_needed()?;
945 Ok(())
946}
947
948#[allow(static_mut_refs)]
949fn init_if_needed() -> apperr::Result<&'static LibraryFunctions> {
950 #[cold]
951 fn load() {
952 unsafe {
953 LIBRARY_FUNCTIONS = LibraryFunctionsState::Failed;
954
955 let Ok(libicuuc) = sys::load_libicuuc() else {
956 return;
957 };
958 let Ok(libicui18n) = sys::load_libicui18n() else {
959 return;
960 };
961
962 type TransparentFunction = unsafe extern "C" fn() -> *const ();
963
964 const _: () = assert!(
974 mem::size_of::<LibraryFunctions>()
975 == mem::size_of::<TransparentFunction>()
976 * (LIBICUUC_PROC_NAMES.len() + LIBICUI18N_PROC_NAMES.len())
977 );
978
979 let mut funcs = MaybeUninit::<LibraryFunctions>::uninit();
980 let mut ptr = funcs.as_mut_ptr() as *mut TransparentFunction;
981
982 #[cfg(unix)]
983 let scratch_outer = scratch_arena(None);
984 #[cfg(unix)]
985 let suffix = sys::icu_proc_suffix(&scratch_outer, libicuuc);
986
987 for (handle, names) in
988 [(libicuuc, &LIBICUUC_PROC_NAMES[..]), (libicui18n, &LIBICUI18N_PROC_NAMES[..])]
989 {
990 for name in names {
991 #[cfg(unix)]
992 let scratch = scratch_arena(Some(&scratch_outer));
993 #[cfg(unix)]
994 let name = &sys::add_icu_proc_suffix(&scratch, name, &suffix);
995
996 let Ok(func) = sys::get_proc_address(handle, name) else {
997 debug_assert!(
998 false,
999 "Failed to load ICU function: {}",
1000 name.to_string_lossy()
1001 );
1002 return;
1003 };
1004
1005 ptr.write(func);
1006 ptr = ptr.add(1);
1007 }
1008 }
1009
1010 LIBRARY_FUNCTIONS = LibraryFunctionsState::Loaded(funcs.assume_init());
1011 }
1012 }
1013
1014 unsafe {
1015 if matches!(&LIBRARY_FUNCTIONS, LibraryFunctionsState::Uninitialized) {
1016 load();
1017 }
1018 }
1019
1020 match unsafe { &LIBRARY_FUNCTIONS } {
1021 LibraryFunctionsState::Loaded(f) => Ok(f),
1022 _ => Err(apperr::APP_ICU_MISSING),
1023 }
1024}
1025
1026#[allow(static_mut_refs)]
1027fn assume_loaded() -> &'static LibraryFunctions {
1028 match unsafe { &LIBRARY_FUNCTIONS } {
1029 LibraryFunctionsState::Loaded(f) => f,
1030 _ => unreachable!(),
1031 }
1032}
1033
1034mod icu_ffi {
1035 #![allow(dead_code, non_camel_case_types)]
1036
1037 use std::ffi::{c_char, c_int, c_void};
1038
1039 use crate::apperr;
1040
1041 #[derive(Copy, Clone, Eq, PartialEq)]
1042 #[repr(transparent)]
1043 pub struct UErrorCode(c_int);
1044
1045 impl UErrorCode {
1046 pub const fn new(code: u32) -> Self {
1047 Self(code as c_int)
1048 }
1049
1050 pub fn is_success(&self) -> bool {
1051 self.0 <= 0
1052 }
1053
1054 pub fn is_failure(&self) -> bool {
1055 self.0 > 0
1056 }
1057
1058 pub fn as_error(&self) -> apperr::Error {
1059 debug_assert!(self.0 > 0);
1060 apperr::Error::new_icu(self.0 as u32)
1061 }
1062 }
1063
1064 pub const U_ZERO_ERROR: UErrorCode = UErrorCode(0);
1065 pub const U_BUFFER_OVERFLOW_ERROR: UErrorCode = UErrorCode(15);
1066 pub const U_UNSUPPORTED_ERROR: UErrorCode = UErrorCode(16);
1067
1068 pub type u_errorName = unsafe extern "C" fn(code: UErrorCode) -> *const c_char;
1069
1070 pub struct UConverter;
1071
1072 pub type ucnv_getAvailableName = unsafe extern "C" fn(n: i32) -> *const c_char;
1073
1074 pub type ucnv_getStandardName = unsafe extern "C" fn(
1075 name: *const u8,
1076 standard: *const u8,
1077 status: &mut UErrorCode,
1078 ) -> *const c_char;
1079
1080 pub type ucnv_open =
1081 unsafe extern "C" fn(converter_name: *const u8, status: &mut UErrorCode) -> *mut UConverter;
1082
1083 pub type ucnv_close = unsafe extern "C" fn(converter: *mut UConverter);
1084
1085 pub type ucnv_convertEx = unsafe extern "C" fn(
1086 target_cnv: *mut UConverter,
1087 source_cnv: *mut UConverter,
1088 target: *mut *mut u8,
1089 target_limit: *const u8,
1090 source: *mut *const u8,
1091 source_limit: *const u8,
1092 pivot_start: *mut u16,
1093 pivot_source: *mut *mut u16,
1094 pivot_target: *mut *mut u16,
1095 pivot_limit: *const u16,
1096 reset: bool,
1097 flush: bool,
1098 status: &mut UErrorCode,
1099 );
1100
1101 pub struct UCaseMap;
1102
1103 pub type ucasemap_open = unsafe extern "C" fn(
1104 locale: *const c_char,
1105 options: u32,
1106 status: &mut UErrorCode,
1107 ) -> *mut UCaseMap;
1108
1109 pub type ucasemap_utf8FoldCase = unsafe extern "C" fn(
1110 csm: *const UCaseMap,
1111 dest: *mut c_char,
1112 dest_capacity: i32,
1113 src: *const c_char,
1114 src_length: i32,
1115 status: &mut UErrorCode,
1116 ) -> i32;
1117
1118 #[repr(C)]
1119 pub enum UCollationResult {
1120 UCOL_EQUAL = 0,
1121 UCOL_GREATER = 1,
1122 UCOL_LESS = -1,
1123 }
1124
1125 #[repr(C)]
1126 pub struct UCollator;
1127
1128 pub type ucol_open =
1129 unsafe extern "C" fn(loc: *const c_char, status: &mut UErrorCode) -> *mut UCollator;
1130
1131 pub type ucol_strcollUTF8 = unsafe extern "C" fn(
1132 coll: *mut UCollator,
1133 source: *const u8,
1134 source_length: i32,
1135 target: *const u8,
1136 target_length: i32,
1137 status: &mut UErrorCode,
1138 ) -> UCollationResult;
1139
1140 pub type UTextClone = unsafe extern "C" fn(
1142 dest: *mut UText,
1143 src: &UText,
1144 deep: bool,
1145 status: &mut UErrorCode,
1146 ) -> *mut UText;
1147 pub type UTextNativeLength = unsafe extern "C" fn(ut: &mut UText) -> i64;
1148 pub type UTextAccess =
1149 unsafe extern "C" fn(ut: &mut UText, native_index: i64, forward: bool) -> bool;
1150 pub type UTextExtract = unsafe extern "C" fn(
1151 ut: &mut UText,
1152 native_start: i64,
1153 native_limit: i64,
1154 dest: *mut u16,
1155 dest_capacity: i32,
1156 status: &mut UErrorCode,
1157 ) -> i32;
1158 pub type UTextReplace = unsafe extern "C" fn(
1159 ut: &mut UText,
1160 native_start: i64,
1161 native_limit: i64,
1162 replacement_text: *const u16,
1163 replacement_length: i32,
1164 status: &mut UErrorCode,
1165 ) -> i32;
1166 pub type UTextCopy = unsafe extern "C" fn(
1167 ut: &mut UText,
1168 native_start: i64,
1169 native_limit: i64,
1170 native_dest: i64,
1171 move_text: bool,
1172 status: &mut UErrorCode,
1173 );
1174 pub type UTextMapOffsetToNative = unsafe extern "C" fn(ut: &UText) -> i64;
1175 pub type UTextMapNativeIndexToUTF16 =
1176 unsafe extern "C" fn(ut: &UText, native_index: i64) -> i32;
1177 pub type UTextClose = unsafe extern "C" fn(ut: &mut UText);
1178
1179 #[repr(C)]
1180 pub struct UTextFuncs {
1181 pub table_size: i32,
1182 pub reserved1: i32,
1183 pub reserved2: i32,
1184 pub reserved3: i32,
1185 pub clone: Option<UTextClone>,
1186 pub native_length: Option<UTextNativeLength>,
1187 pub access: Option<UTextAccess>,
1188 pub extract: Option<UTextExtract>,
1189 pub replace: Option<UTextReplace>,
1190 pub copy: Option<UTextCopy>,
1191 pub map_offset_to_native: Option<UTextMapOffsetToNative>,
1192 pub map_native_index_to_utf16: Option<UTextMapNativeIndexToUTF16>,
1193 pub close: Option<UTextClose>,
1194 pub spare1: Option<UTextClose>,
1195 pub spare2: Option<UTextClose>,
1196 pub spare3: Option<UTextClose>,
1197 }
1198
1199 #[repr(C)]
1200 pub struct UText {
1201 pub magic: u32,
1202 pub flags: i32,
1203 pub provider_properties: i32,
1204 pub size_of_struct: i32,
1205 pub chunk_native_limit: i64,
1206 pub extra_size: i32,
1207 pub native_indexing_limit: i32,
1208 pub chunk_native_start: i64,
1209 pub chunk_offset: i32,
1210 pub chunk_length: i32,
1211 pub chunk_contents: *const u16,
1212 pub p_funcs: &'static UTextFuncs,
1213 pub p_extra: *mut c_void,
1214 pub context: *mut c_void,
1215 pub p: *mut c_void,
1216 pub q: *mut c_void,
1217 pub r: *mut c_void,
1218 pub priv_p: *mut c_void,
1219 pub a: i64,
1220 pub b: i32,
1221 pub c: i32,
1222 pub priv_a: i64,
1223 pub priv_b: i32,
1224 pub priv_c: i32,
1225 }
1226
1227 pub const UTEXT_MAGIC: u32 = 0x345ad82c;
1228 pub const UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE: i32 = 1;
1229 pub const UTEXT_PROVIDER_STABLE_CHUNKS: i32 = 2;
1230 pub const UTEXT_PROVIDER_WRITABLE: i32 = 3;
1231 pub const UTEXT_PROVIDER_HAS_META_DATA: i32 = 4;
1232 pub const UTEXT_PROVIDER_OWNS_TEXT: i32 = 5;
1233
1234 pub type utext_setup = unsafe extern "C" fn(
1235 ut: *mut UText,
1236 extra_space: i32,
1237 status: &mut UErrorCode,
1238 ) -> *mut UText;
1239 pub type utext_close = unsafe extern "C" fn(ut: *mut UText) -> *mut UText;
1240
1241 #[repr(C)]
1242 pub struct UParseError {
1243 pub line: i32,
1244 pub offset: i32,
1245 pub pre_context: [u16; 16],
1246 pub post_context: [u16; 16],
1247 }
1248
1249 #[repr(C)]
1250 pub struct URegularExpression;
1251
1252 pub const UREGEX_UNIX_LINES: i32 = 1;
1253 pub const UREGEX_CASE_INSENSITIVE: i32 = 2;
1254 pub const UREGEX_COMMENTS: i32 = 4;
1255 pub const UREGEX_MULTILINE: i32 = 8;
1256 pub const UREGEX_LITERAL: i32 = 16;
1257 pub const UREGEX_DOTALL: i32 = 32;
1258 pub const UREGEX_UWORD: i32 = 256;
1259 pub const UREGEX_ERROR_ON_UNKNOWN_ESCAPES: i32 = 512;
1260
1261 pub type uregex_open = unsafe extern "C" fn(
1262 pattern: *const u16,
1263 pattern_length: i32,
1264 flags: i32,
1265 pe: Option<&mut UParseError>,
1266 status: &mut UErrorCode,
1267 ) -> *mut URegularExpression;
1268 pub type uregex_close = unsafe extern "C" fn(regexp: *mut URegularExpression);
1269 pub type uregex_setTimeLimit =
1270 unsafe extern "C" fn(regexp: *mut URegularExpression, limit: i32, status: &mut UErrorCode);
1271 pub type uregex_setUText = unsafe extern "C" fn(
1272 regexp: *mut URegularExpression,
1273 text: *mut UText,
1274 status: &mut UErrorCode,
1275 );
1276 pub type uregex_reset64 =
1277 unsafe extern "C" fn(regexp: *mut URegularExpression, index: i64, status: &mut UErrorCode);
1278 pub type uregex_findNext =
1279 unsafe extern "C" fn(regexp: *mut URegularExpression, status: &mut UErrorCode) -> bool;
1280 pub type uregex_start64 = unsafe extern "C" fn(
1281 regexp: *mut URegularExpression,
1282 group_num: i32,
1283 status: &mut UErrorCode,
1284 ) -> i64;
1285 pub type uregex_end64 = unsafe extern "C" fn(
1286 regexp: *mut URegularExpression,
1287 group_num: i32,
1288 status: &mut UErrorCode,
1289 ) -> i64;
1290}
1291
1292#[cfg(test)]
1293mod tests {
1294 use super::*;
1295
1296 #[test]
1297 fn test_compare_strings_ascii() {
1298 assert_eq!(compare_strings_ascii(b"", b""), Ordering::Equal);
1300 assert_eq!(compare_strings_ascii(b"hello", b"hello"), Ordering::Equal);
1302 assert_eq!(compare_strings_ascii(b"abc", b"abcd"), Ordering::Less);
1304 assert_eq!(compare_strings_ascii(b"abcd", b"abc"), Ordering::Greater);
1305 assert_eq!(compare_strings_ascii(b"AbC", b"aBc"), Ordering::Less);
1307 assert_eq!(compare_strings_ascii(b"hallo", b"Hello"), Ordering::Less);
1309 assert_eq!(compare_strings_ascii(b"Hello", b"hallo"), Ordering::Greater);
1310 }
1311}