unicode_shaper/
lib.rs

1#![no_std]
2#![cfg_attr(not(any(target_arch = "wasm32", feature = "wasm")), forbid(unsafe_code))]
3#![deny(missing_docs)]
4#![cfg_attr(docsrs, feature(doc_cfg))]
5//! # Unicode Shaper
6//!
7//! ## Description
8//! The `unicode-shaper` Rust crate provides functionalities to parse, shape, and process
9//! Unicode text. This crate uses `no_std` and is intended to be available for
10//! embedded systems and WASM applications.
11
12extern crate alloc;
13
14/// Unicode shaping tools
15pub mod shape;
16/// Unicode bidirectional tools
17pub mod ubidi;
18
19pub use shape::*;
20pub use ubidi::*;
21
22#[cfg(any(target_arch = "wasm32", feature = "wasm"))]
23use alloc::vec::Vec;
24#[cfg(any(target_arch = "wasm32", feature = "wasm"))]
25use core::mem;
26#[cfg(any(target_arch = "wasm32", feature = "wasm"))]
27use lol_alloc::{AssumeSingleThreaded, FreeListAllocator};
28
29/// SAFETY: This application is single threaded, so using AssumeSingleThreaded is allowed.
30#[cfg(any(target_arch = "wasm32", feature = "wasm"))]
31#[global_allocator]
32static ALLOCATOR: AssumeSingleThreaded<FreeListAllocator> =
33    unsafe { AssumeSingleThreaded::new(FreeListAllocator::new()) };
34
35/// Define a panic handler for WASM applications
36#[cfg(any(target_arch = "wasm32", feature = "wasm"))]
37mod wasm_specific {
38    #[panic_handler]
39    fn panic(_info: &core::panic::PanicInfo) -> ! {
40        loop {}
41    }
42}
43
44/// Declare the external JavaScript function
45#[cfg(any(target_arch = "wasm32", feature = "wasm"))]
46extern "C" {
47    fn setUnicodeArray(ptr: *const u16, size: usize);
48}
49
50/// # Safety
51///
52/// This function is not safe, but it's only used in wasm
53#[cfg(any(target_arch = "wasm32", feature = "wasm"))]
54#[no_mangle]
55pub unsafe extern "C" fn processText(input_ptr: *const u16, len: usize, options: u32) {
56    // Convert the input pointer and length to a slice
57    let input_slice = core::slice::from_raw_parts(input_ptr, len);
58    // Modify the input data
59    let result_vec = shape_unicode(input_slice, &options);
60    // Call setUnicodeArray to pass the resultant data to JavaScript
61    setUnicodeArray(result_vec.as_ptr(), result_vec.len());
62}
63
64/// # Safety
65///
66/// This function is not safe, but it's only used in wasm
67#[cfg(any(target_arch = "wasm32", feature = "wasm"))]
68#[no_mangle]
69pub unsafe extern "C" fn allocUnicodeArray(size: usize) -> *mut u16 {
70    // Allocate memory
71    let mut buffer: Vec<u16> = Vec::with_capacity(size);
72    buffer.capacity();
73    // Ensure capacity matches size to avoid resizing
74    buffer.set_len(size);
75    // Get a raw pointer to the allocated memory
76    let ptr = buffer.as_mut_ptr();
77    // Prevent the buffer from being deallocated when it goes out of scope
78    mem::forget(buffer);
79
80    ptr
81}
82
83/// # Safety
84///
85/// This function is not safe, but it's only used in wasm
86#[cfg(any(target_arch = "wasm32", feature = "wasm"))]
87#[no_mangle]
88pub unsafe extern "C" fn free(ptr: *mut u16, size: usize) {
89    // Convert the pointer to a slice and then drop it
90    let _ = core::slice::from_raw_parts_mut(ptr, size);
91
92    // Deallocate the memory
93    alloc::alloc::dealloc(ptr as *mut u8, alloc::alloc::Layout::array::<u16>(size).unwrap());
94}
95
96/// Check if a character is RTL
97#[cfg(any(target_arch = "wasm32", feature = "wasm"))]
98#[no_mangle]
99pub extern "C" fn isRTL(input: u16) -> bool {
100    is_rtl(&input)
101}
102
103/// Check if a character is CJK
104#[cfg(any(target_arch = "wasm32", feature = "wasm"))]
105#[no_mangle]
106pub extern "C" fn isCJK(input: u16) -> bool {
107    /// Check if a character is CJK
108    is_cjk(&input)
109}