shape_runtime/stdlib/
unicode.rs1use crate::marshal::{register_typed_fn_1, register_typed_fn_2};
12use crate::module_exports::ModuleExports;
13use crate::typed_module_exports::{ConcreteReturn, ConcreteType, TypedReturn};
14use std::sync::Arc;
15
16pub fn create_unicode_module() -> ModuleExports {
18 let mut module = ModuleExports::new("std::core::unicode");
19 module.description = "Unicode text processing utilities".to_string();
20
21 register_typed_fn_2::<_, Arc<String>, Arc<String>>(
23 &mut module,
24 "normalize",
25 "Normalize a Unicode string to the specified form",
26 [("text", "string"), ("form", "string")],
27 ConcreteType::String,
28 |text, form, _ctx| {
29 use unicode_normalization::UnicodeNormalization;
30
31 let normalized: String = match form.as_str() {
32 "NFC" => text.nfc().collect(),
33 "NFD" => text.nfd().collect(),
34 "NFKC" => text.nfkc().collect(),
35 "NFKD" => text.nfkd().collect(),
36 _ => {
37 return Err(format!(
38 "unicode.normalize(): unknown form '{}', expected NFC/NFD/NFKC/NFKD",
39 form.as_str()
40 ));
41 }
42 };
43
44 Ok(TypedReturn::Concrete(ConcreteReturn::String(normalized)))
45 },
46 );
47
48 register_typed_fn_1::<_, i64>(
50 &mut module,
51 "category",
52 "Get the Unicode general category of a codepoint",
53 "codepoint",
54 "int",
55 ConcreteType::String,
56 |cp, _ctx| {
57 let ch = char::from_u32(cp as u32)
58 .ok_or_else(|| format!("unicode.category(): invalid codepoint {}", cp))?;
59
60 Ok(TypedReturn::Concrete(ConcreteReturn::String(
61 unicode_general_category(ch).to_string(),
62 )))
63 },
64 );
65
66 register_typed_fn_1::<_, Arc<String>>(
68 &mut module,
69 "is_letter",
70 "Check if the first character is a Unicode letter",
71 "char",
72 "string",
73 ConcreteType::Bool,
74 |s, _ctx| {
75 Ok(TypedReturn::Concrete(ConcreteReturn::Bool(
76 s.chars().next().map_or(false, |c| c.is_alphabetic()),
77 )))
78 },
79 );
80
81 register_typed_fn_1::<_, Arc<String>>(
83 &mut module,
84 "is_digit",
85 "Check if the first character is a Unicode digit",
86 "char",
87 "string",
88 ConcreteType::Bool,
89 |s, _ctx| {
90 Ok(TypedReturn::Concrete(ConcreteReturn::Bool(
91 s.chars().next().map_or(false, |c| c.is_numeric()),
92 )))
93 },
94 );
95
96 register_typed_fn_1::<_, Arc<String>>(
98 &mut module,
99 "graphemes",
100 "Split a string into Unicode grapheme clusters",
101 "text",
102 "string",
103 ConcreteType::ArrayString,
104 |text, _ctx| {
105 use unicode_segmentation::UnicodeSegmentation;
106
107 let clusters: Vec<String> = text.graphemes(true).map(|g| g.to_string()).collect();
108 Ok(TypedReturn::Concrete(ConcreteReturn::ArrayString(clusters)))
109 },
110 );
111
112 module
113}
114
115fn unicode_general_category(ch: char) -> &'static str {
117 if ch.is_uppercase() {
118 "Lu"
119 } else if ch.is_lowercase() {
120 "Ll"
121 } else if ch.is_alphabetic() {
122 "Lo"
123 } else if ch.is_ascii_digit() {
124 "Nd"
125 } else if ch.is_numeric() {
126 "No"
127 } else if ch.is_whitespace() {
128 "Zs"
129 } else if ch.is_control() {
130 "Cc"
131 } else if ch.is_ascii_punctuation() {
132 "Po"
133 } else {
134 "Cn"
135 }
136}
137
138#[cfg(test)]
139mod tests {
140 use super::*;
141
142 #[test]
143 fn test_unicode_module_creation() {
144 let module = create_unicode_module();
145 assert_eq!(module.name, "std::core::unicode");
146 assert!(module.has_export("normalize"));
147 assert!(module.has_export("category"));
148 assert!(module.has_export("is_letter"));
149 assert!(module.has_export("is_digit"));
150 assert!(module.has_export("graphemes"));
151 }
152
153 #[test]
154 fn test_unicode_typed_registry_populated() {
155 let module = create_unicode_module();
156 let typed = module.typed_exports();
157 assert!(typed.get("normalize").is_some());
158 assert!(typed.get("category").is_some());
159 assert!(typed.get("is_letter").is_some());
160 assert!(typed.get("is_digit").is_some());
161 assert!(typed.get("graphemes").is_some());
162 assert_eq!(typed.functions.len(), 5);
163 }
164
165 }