1use oxilean_kernel::{BinderInfo, Declaration, Environment, Expr, Level, Name};
6
7use super::types::{
8 CharCategory, CharInfo, CharPredicateTable, CharRange, CharScanner, UnicodeBlocks,
9};
10
11pub fn build_char_env(env: &mut Environment) -> Result<(), String> {
13 let type1 = Expr::Sort(Level::succ(Level::zero()));
14 env.add(Declaration::Axiom {
15 name: Name::str("Char"),
16 univ_params: vec![],
17 ty: type1.clone(),
18 })
19 .map_err(|e| e.to_string())?;
20 let of_nat_ty = Expr::Pi(
21 oxilean_kernel::BinderInfo::Default,
22 Name::str("n"),
23 Box::new(Expr::Const(Name::str("Nat"), vec![])),
24 Box::new(Expr::Const(Name::str("Char"), vec![])),
25 );
26 env.add(Declaration::Axiom {
27 name: Name::str("Char.ofNat"),
28 univ_params: vec![],
29 ty: of_nat_ty,
30 })
31 .map_err(|e| e.to_string())?;
32 let to_nat_ty = Expr::Pi(
33 oxilean_kernel::BinderInfo::Default,
34 Name::str("c"),
35 Box::new(Expr::Const(Name::str("Char"), vec![])),
36 Box::new(Expr::Const(Name::str("Nat"), vec![])),
37 );
38 env.add(Declaration::Axiom {
39 name: Name::str("Char.toNat"),
40 univ_params: vec![],
41 ty: to_nat_ty,
42 })
43 .map_err(|e| e.to_string())?;
44 let is_alpha_ty = Expr::Pi(
45 oxilean_kernel::BinderInfo::Default,
46 Name::str("c"),
47 Box::new(Expr::Const(Name::str("Char"), vec![])),
48 Box::new(Expr::Const(Name::str("Bool"), vec![])),
49 );
50 env.add(Declaration::Axiom {
51 name: Name::str("Char.isAlpha"),
52 univ_params: vec![],
53 ty: is_alpha_ty,
54 })
55 .map_err(|e| e.to_string())?;
56 let is_digit_ty = Expr::Pi(
57 oxilean_kernel::BinderInfo::Default,
58 Name::str("c"),
59 Box::new(Expr::Const(Name::str("Char"), vec![])),
60 Box::new(Expr::Const(Name::str("Bool"), vec![])),
61 );
62 env.add(Declaration::Axiom {
63 name: Name::str("Char.isDigit"),
64 univ_params: vec![],
65 ty: is_digit_ty,
66 })
67 .map_err(|e| e.to_string())?;
68 build_char_predicates(env)?;
69 build_char_conversions(env)?;
70 build_char_comparisons(env)?;
71 Ok(())
72}
73pub fn build_char_predicates(env: &mut Environment) -> Result<(), String> {
75 let char_to_bool = Expr::Pi(
76 BinderInfo::Default,
77 Name::str("c"),
78 Box::new(Expr::Const(Name::str("Char"), vec![])),
79 Box::new(Expr::Const(Name::str("Bool"), vec![])),
80 );
81 let predicates = [
82 "Char.isUpper",
83 "Char.isLower",
84 "Char.isAlphaNum",
85 "Char.isWhitespace",
86 "Char.isPunctuation",
87 "Char.isAscii",
88 "Char.isControl",
89 "Char.isPrint",
90 "Char.isHexDigit",
91 "Char.isOctDigit",
92 ];
93 for pred_name in predicates {
94 env.add(Declaration::Axiom {
95 name: Name::str(pred_name),
96 univ_params: vec![],
97 ty: char_to_bool.clone(),
98 })
99 .map_err(|e| e.to_string())?;
100 }
101 Ok(())
102}
103pub fn build_char_conversions(env: &mut Environment) -> Result<(), String> {
105 let char_to_char = Expr::Pi(
106 BinderInfo::Default,
107 Name::str("c"),
108 Box::new(Expr::Const(Name::str("Char"), vec![])),
109 Box::new(Expr::Const(Name::str("Char"), vec![])),
110 );
111 env.add(Declaration::Axiom {
112 name: Name::str("Char.toUpper"),
113 univ_params: vec![],
114 ty: char_to_char.clone(),
115 })
116 .map_err(|e| e.to_string())?;
117 env.add(Declaration::Axiom {
118 name: Name::str("Char.toLower"),
119 univ_params: vec![],
120 ty: char_to_char.clone(),
121 })
122 .map_err(|e| e.to_string())?;
123 let char_to_nat = Expr::Pi(
124 BinderInfo::Default,
125 Name::str("c"),
126 Box::new(Expr::Const(Name::str("Char"), vec![])),
127 Box::new(Expr::Const(Name::str("Nat"), vec![])),
128 );
129 env.add(Declaration::Axiom {
130 name: Name::str("Char.digitToNat"),
131 univ_params: vec![],
132 ty: char_to_nat.clone(),
133 })
134 .map_err(|e| e.to_string())?;
135 env.add(Declaration::Axiom {
136 name: Name::str("Char.hexDigitToNat"),
137 univ_params: vec![],
138 ty: char_to_nat.clone(),
139 })
140 .map_err(|e| e.to_string())?;
141 env.add(Declaration::Axiom {
142 name: Name::str("Char.utf8Size"),
143 univ_params: vec![],
144 ty: char_to_nat,
145 })
146 .map_err(|e| e.to_string())?;
147 Ok(())
148}
149pub fn build_char_comparisons(env: &mut Environment) -> Result<(), String> {
151 let char_char_to_bool = Expr::Pi(
152 BinderInfo::Default,
153 Name::str("a"),
154 Box::new(Expr::Const(Name::str("Char"), vec![])),
155 Box::new(Expr::Pi(
156 BinderInfo::Default,
157 Name::str("b"),
158 Box::new(Expr::Const(Name::str("Char"), vec![])),
159 Box::new(Expr::Const(Name::str("Bool"), vec![])),
160 )),
161 );
162 env.add(Declaration::Axiom {
163 name: Name::str("Char.beq"),
164 univ_params: vec![],
165 ty: char_char_to_bool.clone(),
166 })
167 .map_err(|e| e.to_string())?;
168 env.add(Declaration::Axiom {
169 name: Name::str("Char.blt"),
170 univ_params: vec![],
171 ty: char_char_to_bool.clone(),
172 })
173 .map_err(|e| e.to_string())?;
174 env.add(Declaration::Axiom {
175 name: Name::str("Char.ble"),
176 univ_params: vec![],
177 ty: char_char_to_bool,
178 })
179 .map_err(|e| e.to_string())?;
180 Ok(())
181}
182pub fn unicode_category(c: char) -> CharCategory {
184 if c.is_uppercase() {
185 CharCategory::UppercaseLetter
186 } else if c.is_lowercase() {
187 CharCategory::LowercaseLetter
188 } else if c.is_ascii_digit() {
189 CharCategory::DecimalNumber
190 } else if c.is_ascii_whitespace() || c == '\u{00A0}' {
191 CharCategory::SpaceSeparator
192 } else if c.is_control() {
193 CharCategory::Control
194 } else if c.is_ascii_punctuation() {
195 CharCategory::ConnectorPunctuation
196 } else if c == '£' || c == '$' || c == '€' || c == '¥' {
197 CharCategory::CurrencySymbol
198 } else if c == '+' || c == '-' || c == '=' || c == '<' || c == '>' {
199 CharCategory::MathSymbol
200 } else {
201 CharCategory::Unknown
202 }
203}
204pub fn is_ascii_alpha(c: char) -> bool {
206 c.is_ascii_alphabetic()
207}
208pub fn is_ascii_alnum(c: char) -> bool {
210 c.is_ascii_alphanumeric()
211}
212pub fn is_valid_unicode(c: char) -> bool {
214 let cp = c as u32;
215 cp <= 0x10FFFF
216}
217pub fn is_hex_digit(c: char) -> bool {
219 c.is_ascii_hexdigit()
220}
221pub fn is_printable_ascii(c: char) -> bool {
223 c.is_ascii() && !c.is_control()
224}
225pub fn is_unicode_letter(c: char) -> bool {
227 c.is_alphabetic()
228}
229pub fn utf8_encoded_len(c: char) -> usize {
231 c.len_utf8()
232}
233pub fn utf16_encoded_len(c: char) -> usize {
235 c.len_utf16()
236}
237pub fn utf8_encode(c: char) -> ([u8; 4], usize) {
239 let mut buf = [0u8; 4];
240 let len = c.encode_utf8(&mut buf).len();
241 (buf, len)
242}
243pub fn utf16_encode(c: char) -> ([u16; 2], usize) {
245 let mut buf = [0u16; 2];
246 let len = c.encode_utf16(&mut buf).len();
247 (buf, len)
248}
249pub fn utf8_decode_first(bytes: &[u8]) -> Option<(char, usize)> {
252 if bytes.is_empty() {
253 return None;
254 }
255 let s = std::str::from_utf8(bytes).ok()?;
256 let c = s.chars().next()?;
257 Some((c, c.len_utf8()))
258}
259pub fn ascii_digit_value(c: char) -> Option<u32> {
262 if c.is_ascii_digit() {
263 Some(c as u32 - b'0' as u32)
264 } else {
265 None
266 }
267}
268pub fn hex_digit_value(c: char) -> Option<u32> {
271 match c {
272 '0'..='9' => Some(c as u32 - b'0' as u32),
273 'a'..='f' => Some(c as u32 - b'a' as u32 + 10),
274 'A'..='F' => Some(c as u32 - b'A' as u32 + 10),
275 _ => None,
276 }
277}
278pub fn from_code_point(cp: u32) -> Option<char> {
280 char::from_u32(cp)
281}
282pub fn to_code_point(c: char) -> u32 {
284 c as u32
285}
286pub fn to_titlecase_first(c: char) -> char {
288 c.to_uppercase().next().unwrap_or(c)
289}
290pub fn case_fold(c: char) -> char {
292 c.to_lowercase().next().unwrap_or(c)
293}
294pub fn latin_alphabet(uppercase: bool) -> Vec<char> {
296 if uppercase {
297 ('A'..='Z').collect()
298 } else {
299 ('a'..='z').collect()
300 }
301}
302pub fn ascii_digits() -> Vec<char> {
304 ('0'..='9').collect()
305}
306pub fn is_ident_start(c: char) -> bool {
308 c == '_' || c.is_alphabetic()
309}
310pub fn is_ident_continue(c: char) -> bool {
312 c == '_' || c.is_alphanumeric() || c == '\''
313}
314pub fn is_oxilean_name_char(c: char) -> bool {
316 c.is_alphanumeric() || c == '_' || c == '.' || c == '\''
317}
318pub fn escape_char(c: char) -> String {
320 match c {
321 '\n' => "\\n".to_string(),
322 '\t' => "\\t".to_string(),
323 '\r' => "\\r".to_string(),
324 '\\' => "\\\\".to_string(),
325 '\'' => "\\'".to_string(),
326 '"' => "\\\"".to_string(),
327 '\0' => "\\0".to_string(),
328 c if c.is_control() => format!("\\u{{{:04X}}}", c as u32),
329 c => c.to_string(),
330 }
331}
332pub fn unescape_char(c: char) -> Option<char> {
335 match c {
336 'n' => Some('\n'),
337 't' => Some('\t'),
338 'r' => Some('\r'),
339 '\\' => Some('\\'),
340 '\'' => Some('\''),
341 '"' => Some('"'),
342 '0' => Some('\0'),
343 _ => None,
344 }
345}
346pub fn make_char_of_nat(n_expr: Expr) -> Expr {
348 Expr::App(
349 Box::new(Expr::Const(Name::str("Char.ofNat"), vec![])),
350 Box::new(n_expr),
351 )
352}
353pub fn make_char_to_nat(c_expr: Expr) -> Expr {
355 Expr::App(
356 Box::new(Expr::Const(Name::str("Char.toNat"), vec![])),
357 Box::new(c_expr),
358 )
359}
360pub fn make_char_literal(code_point: u32) -> Expr {
362 let nat_lit = Expr::Lit(oxilean_kernel::Literal::Nat(code_point.into()));
363 make_char_of_nat(nat_lit)
364}
365pub fn decode_unicode_escape(hex: &str) -> Option<char> {
367 let cp = u32::from_str_radix(hex.trim_start_matches('{').trim_end_matches('}'), 16).ok()?;
368 char::from_u32(cp)
369}
370pub fn registered_char_names(env: &Environment) -> Vec<String> {
372 let candidates = [
373 "Char",
374 "Char.ofNat",
375 "Char.toNat",
376 "Char.isAlpha",
377 "Char.isDigit",
378 "Char.isUpper",
379 "Char.isLower",
380 "Char.isAlphaNum",
381 "Char.isWhitespace",
382 "Char.isPunctuation",
383 "Char.isAscii",
384 "Char.isControl",
385 "Char.isPrint",
386 "Char.isHexDigit",
387 "Char.isOctDigit",
388 "Char.toUpper",
389 "Char.toLower",
390 "Char.digitToNat",
391 "Char.hexDigitToNat",
392 "Char.utf8Size",
393 "Char.beq",
394 "Char.blt",
395 "Char.ble",
396 ];
397 candidates
398 .iter()
399 .filter(|n| env.get(&Name::str(**n)).is_some())
400 .map(|s| s.to_string())
401 .collect()
402}
403#[cfg(test)]
404mod tests {
405 use super::*;
406 fn setup_env() -> Environment {
407 let mut env = Environment::new();
408 let type1 = Expr::Sort(Level::succ(Level::zero()));
409 env.add(Declaration::Axiom {
410 name: Name::str("Nat"),
411 univ_params: vec![],
412 ty: type1.clone(),
413 })
414 .expect("operation should succeed");
415 env.add(Declaration::Axiom {
416 name: Name::str("Bool"),
417 univ_params: vec![],
418 ty: type1,
419 })
420 .expect("operation should succeed");
421 env
422 }
423 #[test]
424 fn test_build_char_env() {
425 let mut env = setup_env();
426 assert!(build_char_env(&mut env).is_ok());
427 assert!(env.get(&Name::str("Char")).is_some());
428 assert!(env.get(&Name::str("Char.ofNat")).is_some());
429 assert!(env.get(&Name::str("Char.toNat")).is_some());
430 }
431 #[test]
432 fn test_char_is_alpha() {
433 let mut env = setup_env();
434 build_char_env(&mut env).expect("build_char_env should succeed");
435 let decl = env
436 .get(&Name::str("Char.isAlpha"))
437 .expect("declaration 'Char.isAlpha' should exist in env");
438 assert!(matches!(decl, Declaration::Axiom { .. }));
439 }
440 #[test]
441 fn test_char_is_digit() {
442 let mut env = setup_env();
443 build_char_env(&mut env).expect("build_char_env should succeed");
444 let decl = env
445 .get(&Name::str("Char.isDigit"))
446 .expect("declaration 'Char.isDigit' should exist in env");
447 assert!(matches!(decl, Declaration::Axiom { .. }));
448 }
449 #[test]
450 fn test_char_predicates_registered() {
451 let mut env = setup_env();
452 build_char_env(&mut env).expect("build_char_env should succeed");
453 assert!(env.get(&Name::str("Char.isUpper")).is_some());
454 assert!(env.get(&Name::str("Char.isLower")).is_some());
455 assert!(env.get(&Name::str("Char.isAlphaNum")).is_some());
456 assert!(env.get(&Name::str("Char.isWhitespace")).is_some());
457 assert!(env.get(&Name::str("Char.isAscii")).is_some());
458 }
459 #[test]
460 fn test_char_conversions_registered() {
461 let mut env = setup_env();
462 build_char_env(&mut env).expect("build_char_env should succeed");
463 assert!(env.get(&Name::str("Char.toUpper")).is_some());
464 assert!(env.get(&Name::str("Char.toLower")).is_some());
465 assert!(env.get(&Name::str("Char.digitToNat")).is_some());
466 assert!(env.get(&Name::str("Char.hexDigitToNat")).is_some());
467 assert!(env.get(&Name::str("Char.utf8Size")).is_some());
468 }
469 #[test]
470 fn test_char_comparisons_registered() {
471 let mut env = setup_env();
472 build_char_env(&mut env).expect("build_char_env should succeed");
473 assert!(env.get(&Name::str("Char.beq")).is_some());
474 assert!(env.get(&Name::str("Char.blt")).is_some());
475 assert!(env.get(&Name::str("Char.ble")).is_some());
476 }
477 #[test]
478 fn test_unicode_category_letter() {
479 assert_eq!(unicode_category('A'), CharCategory::UppercaseLetter);
480 assert_eq!(unicode_category('z'), CharCategory::LowercaseLetter);
481 }
482 #[test]
483 fn test_unicode_category_digit() {
484 assert_eq!(unicode_category('5'), CharCategory::DecimalNumber);
485 }
486 #[test]
487 fn test_unicode_category_whitespace() {
488 assert_eq!(unicode_category(' '), CharCategory::SpaceSeparator);
489 }
490 #[test]
491 fn test_utf8_encoded_len_ascii() {
492 assert_eq!(utf8_encoded_len('A'), 1);
493 assert_eq!(utf8_encoded_len('\n'), 1);
494 }
495 #[test]
496 fn test_utf8_encoded_len_multibyte() {
497 assert_eq!(utf8_encoded_len('€'), 3);
498 assert_eq!(utf8_encoded_len('𝄞'), 4);
499 }
500 #[test]
501 fn test_hex_digit_value() {
502 assert_eq!(hex_digit_value('0'), Some(0));
503 assert_eq!(hex_digit_value('9'), Some(9));
504 assert_eq!(hex_digit_value('a'), Some(10));
505 assert_eq!(hex_digit_value('F'), Some(15));
506 assert_eq!(hex_digit_value('g'), None);
507 assert_eq!(hex_digit_value('z'), None);
508 }
509 #[test]
510 fn test_ascii_digit_value() {
511 assert_eq!(ascii_digit_value('0'), Some(0));
512 assert_eq!(ascii_digit_value('9'), Some(9));
513 assert_eq!(ascii_digit_value('a'), None);
514 }
515 #[test]
516 fn test_from_code_point() {
517 assert_eq!(from_code_point(65), Some('A'));
518 assert_eq!(from_code_point(0x1F600), Some('😀'));
519 assert_eq!(from_code_point(0xD800), None);
520 }
521 #[test]
522 fn test_to_code_point() {
523 assert_eq!(to_code_point('A'), 65);
524 assert_eq!(to_code_point('\0'), 0);
525 }
526 #[test]
527 fn test_escape_char() {
528 assert_eq!(escape_char('\n'), "\\n");
529 assert_eq!(escape_char('\t'), "\\t");
530 assert_eq!(escape_char('A'), "A");
531 assert_eq!(escape_char('\\'), "\\\\");
532 }
533 #[test]
534 fn test_unescape_char() {
535 assert_eq!(unescape_char('n'), Some('\n'));
536 assert_eq!(unescape_char('t'), Some('\t'));
537 assert_eq!(unescape_char('z'), None);
538 }
539 #[test]
540 fn test_char_info_new() {
541 let info = CharInfo::new('A');
542 assert_eq!(info.ch, 'A');
543 assert_eq!(info.code_point, 65);
544 assert_eq!(info.utf8_len, 1);
545 assert!(info.is_ascii);
546 assert!(info.is_letter());
547 assert!(!info.is_digit());
548 }
549 #[test]
550 fn test_char_info_digit() {
551 let info = CharInfo::new('7');
552 assert!(info.is_digit());
553 assert!(!info.is_letter());
554 }
555 #[test]
556 fn test_char_info_whitespace() {
557 let info = CharInfo::new(' ');
558 assert!(info.is_whitespace());
559 }
560 #[test]
561 fn test_predicate_table_lookup() {
562 let table = CharPredicateTable::new();
563 let pred = table.lookup("isAlpha");
564 assert!(pred.is_some());
565 assert!(pred.expect("pred should be valid")('A'));
566 assert!(!pred.expect("pred should be valid")('1'));
567 }
568 #[test]
569 fn test_predicate_table_apply() {
570 let table = CharPredicateTable::new();
571 assert_eq!(table.apply("isDigit", '5'), Some(true));
572 assert_eq!(table.apply("isDigit", 'a'), Some(false));
573 assert_eq!(table.apply("nonexistent", 'a'), None);
574 }
575 #[test]
576 fn test_predicate_table_names() {
577 let table = CharPredicateTable::new();
578 let names = table.names();
579 assert!(names.contains(&"isAlpha"));
580 assert!(names.contains(&"isDigit"));
581 assert!(names.contains(&"isUpper"));
582 assert!(names.len() >= 10);
583 }
584 #[test]
585 fn test_is_ident_start() {
586 assert!(is_ident_start('a'));
587 assert!(is_ident_start('_'));
588 assert!(is_ident_start('Z'));
589 assert!(!is_ident_start('1'));
590 assert!(!is_ident_start(' '));
591 }
592 #[test]
593 fn test_is_ident_continue() {
594 assert!(is_ident_continue('a'));
595 assert!(is_ident_continue('_'));
596 assert!(is_ident_continue('1'));
597 assert!(is_ident_continue('\''));
598 assert!(!is_ident_continue(' '));
599 assert!(!is_ident_continue('.'));
600 }
601 #[test]
602 fn test_latin_alphabet() {
603 let upper = latin_alphabet(true);
604 assert_eq!(upper.len(), 26);
605 assert_eq!(upper[0], 'A');
606 assert_eq!(upper[25], 'Z');
607 let lower = latin_alphabet(false);
608 assert_eq!(lower.len(), 26);
609 assert_eq!(lower[0], 'a');
610 }
611 #[test]
612 fn test_ascii_digits_vec() {
613 let digits = ascii_digits();
614 assert_eq!(digits.len(), 10);
615 assert_eq!(digits[0], '0');
616 assert_eq!(digits[9], '9');
617 }
618 #[test]
619 fn test_utf8_decode_first_ascii() {
620 let bytes = b"Hello";
621 let result = utf8_decode_first(bytes);
622 assert_eq!(result, Some(('H', 1)));
623 }
624 #[test]
625 fn test_utf8_decode_first_multibyte() {
626 let c = '€';
627 let mut buf = [0u8; 4];
628 let s = c.encode_utf8(&mut buf);
629 let bytes = s.as_bytes();
630 let result = utf8_decode_first(bytes);
631 assert_eq!(result, Some(('€', 3)));
632 }
633 #[test]
634 fn test_decode_unicode_escape() {
635 assert_eq!(decode_unicode_escape("0041"), Some('A'));
636 assert_eq!(decode_unicode_escape("{0041}"), Some('A'));
637 assert_eq!(decode_unicode_escape("1F600"), Some('😀'));
638 }
639 #[test]
640 fn test_registered_char_names() {
641 let mut env = setup_env();
642 build_char_env(&mut env).expect("build_char_env should succeed");
643 let names = registered_char_names(&env);
644 assert!(names.contains(&"Char".to_string()));
645 assert!(names.contains(&"Char.ofNat".to_string()));
646 assert!(names.len() >= 5);
647 }
648 #[test]
649 fn test_make_char_literal() {
650 let expr = make_char_literal(65);
651 assert!(matches!(expr, Expr::App(_, _)));
652 }
653 #[test]
654 fn test_is_valid_unicode() {
655 assert!(is_valid_unicode('A'));
656 assert!(is_valid_unicode('😀'));
657 assert!(is_valid_unicode('\0'));
658 }
659 #[test]
660 fn test_case_fold() {
661 assert_eq!(case_fold('A'), 'a');
662 assert_eq!(case_fold('z'), 'z');
663 assert_eq!(case_fold('5'), '5');
664 }
665}
666#[allow(dead_code)]
668pub fn normalize_whitespace(s: &str) -> String {
669 s.chars()
670 .map(|c| if c.is_whitespace() { ' ' } else { c })
671 .collect()
672}
673#[allow(dead_code)]
675pub fn strip_control_chars(s: &str) -> String {
676 s.chars().filter(|c| !c.is_control()).collect()
677}
678#[allow(dead_code)]
685pub fn normalize_to_nfc_approx(s: &str) -> String {
686 let chars: Vec<char> = s.chars().collect();
687 let mut result = String::with_capacity(s.len());
688 let mut i = 0;
689 while i < chars.len() {
690 let base = chars[i];
691 if i + 1 < chars.len() {
692 if let Some(composed) = compose_pair(base, chars[i + 1]) {
693 result.push(composed);
694 i += 2;
695 continue;
696 }
697 }
698 result.push(base);
699 i += 1;
700 }
701 result
702}
703#[allow(dead_code)]
707pub fn compose_pair(base: char, combining: char) -> Option<char> {
708 match (base, combining) {
709 ('A', '\u{0300}') => Some('À'),
710 ('E', '\u{0300}') => Some('È'),
711 ('I', '\u{0300}') => Some('Ì'),
712 ('O', '\u{0300}') => Some('Ò'),
713 ('U', '\u{0300}') => Some('Ù'),
714 ('W', '\u{0300}') => Some('Ẁ'),
715 ('Y', '\u{0300}') => Some('Ỳ'),
716 ('N', '\u{0300}') => Some('Ǹ'),
717 ('a', '\u{0300}') => Some('à'),
718 ('e', '\u{0300}') => Some('è'),
719 ('i', '\u{0300}') => Some('ì'),
720 ('o', '\u{0300}') => Some('ò'),
721 ('u', '\u{0300}') => Some('ù'),
722 ('w', '\u{0300}') => Some('ẁ'),
723 ('y', '\u{0300}') => Some('ỳ'),
724 ('n', '\u{0300}') => Some('ǹ'),
725 ('A', '\u{0301}') => Some('Á'),
726 ('C', '\u{0301}') => Some('Ć'),
727 ('E', '\u{0301}') => Some('É'),
728 ('G', '\u{0301}') => Some('Ǵ'),
729 ('I', '\u{0301}') => Some('Í'),
730 ('K', '\u{0301}') => Some('Ḱ'),
731 ('L', '\u{0301}') => Some('Ĺ'),
732 ('M', '\u{0301}') => Some('Ḿ'),
733 ('N', '\u{0301}') => Some('Ń'),
734 ('O', '\u{0301}') => Some('Ó'),
735 ('P', '\u{0301}') => Some('Ṕ'),
736 ('R', '\u{0301}') => Some('Ŕ'),
737 ('S', '\u{0301}') => Some('Ś'),
738 ('U', '\u{0301}') => Some('Ú'),
739 ('W', '\u{0301}') => Some('Ẃ'),
740 ('Y', '\u{0301}') => Some('Ý'),
741 ('Z', '\u{0301}') => Some('Ź'),
742 ('a', '\u{0301}') => Some('á'),
743 ('c', '\u{0301}') => Some('ć'),
744 ('e', '\u{0301}') => Some('é'),
745 ('g', '\u{0301}') => Some('ǵ'),
746 ('i', '\u{0301}') => Some('í'),
747 ('k', '\u{0301}') => Some('ḱ'),
748 ('l', '\u{0301}') => Some('ĺ'),
749 ('m', '\u{0301}') => Some('ḿ'),
750 ('n', '\u{0301}') => Some('ń'),
751 ('o', '\u{0301}') => Some('ó'),
752 ('p', '\u{0301}') => Some('ṕ'),
753 ('r', '\u{0301}') => Some('ŕ'),
754 ('s', '\u{0301}') => Some('ś'),
755 ('u', '\u{0301}') => Some('ú'),
756 ('w', '\u{0301}') => Some('ẃ'),
757 ('y', '\u{0301}') => Some('ý'),
758 ('z', '\u{0301}') => Some('ź'),
759 ('A', '\u{0302}') => Some('Â'),
760 ('C', '\u{0302}') => Some('Ĉ'),
761 ('E', '\u{0302}') => Some('Ê'),
762 ('G', '\u{0302}') => Some('Ĝ'),
763 ('H', '\u{0302}') => Some('Ĥ'),
764 ('I', '\u{0302}') => Some('Î'),
765 ('J', '\u{0302}') => Some('Ĵ'),
766 ('O', '\u{0302}') => Some('Ô'),
767 ('S', '\u{0302}') => Some('Ŝ'),
768 ('U', '\u{0302}') => Some('Û'),
769 ('W', '\u{0302}') => Some('Ŵ'),
770 ('Y', '\u{0302}') => Some('Ŷ'),
771 ('Z', '\u{0302}') => Some('Ẑ'),
772 ('a', '\u{0302}') => Some('â'),
773 ('c', '\u{0302}') => Some('ĉ'),
774 ('e', '\u{0302}') => Some('ê'),
775 ('g', '\u{0302}') => Some('ĝ'),
776 ('h', '\u{0302}') => Some('ĥ'),
777 ('i', '\u{0302}') => Some('î'),
778 ('j', '\u{0302}') => Some('ĵ'),
779 ('o', '\u{0302}') => Some('ô'),
780 ('s', '\u{0302}') => Some('ŝ'),
781 ('u', '\u{0302}') => Some('û'),
782 ('w', '\u{0302}') => Some('ŵ'),
783 ('y', '\u{0302}') => Some('ŷ'),
784 ('z', '\u{0302}') => Some('ẑ'),
785 ('A', '\u{0303}') => Some('Ã'),
786 ('E', '\u{0303}') => Some('Ẽ'),
787 ('I', '\u{0303}') => Some('Ĩ'),
788 ('N', '\u{0303}') => Some('Ñ'),
789 ('O', '\u{0303}') => Some('Õ'),
790 ('U', '\u{0303}') => Some('Ũ'),
791 ('V', '\u{0303}') => Some('Ṽ'),
792 ('Y', '\u{0303}') => Some('Ỹ'),
793 ('a', '\u{0303}') => Some('ã'),
794 ('e', '\u{0303}') => Some('ẽ'),
795 ('i', '\u{0303}') => Some('ĩ'),
796 ('n', '\u{0303}') => Some('ñ'),
797 ('o', '\u{0303}') => Some('õ'),
798 ('u', '\u{0303}') => Some('ũ'),
799 ('v', '\u{0303}') => Some('ṽ'),
800 ('y', '\u{0303}') => Some('ỹ'),
801 ('A', '\u{0304}') => Some('Ā'),
802 ('E', '\u{0304}') => Some('Ē'),
803 ('I', '\u{0304}') => Some('Ī'),
804 ('O', '\u{0304}') => Some('Ō'),
805 ('U', '\u{0304}') => Some('Ū'),
806 ('G', '\u{0304}') => Some('Ḡ'),
807 ('Y', '\u{0304}') => Some('Ȳ'),
808 ('a', '\u{0304}') => Some('ā'),
809 ('e', '\u{0304}') => Some('ē'),
810 ('i', '\u{0304}') => Some('ī'),
811 ('o', '\u{0304}') => Some('ō'),
812 ('u', '\u{0304}') => Some('ū'),
813 ('g', '\u{0304}') => Some('ḡ'),
814 ('y', '\u{0304}') => Some('ȳ'),
815 ('A', '\u{0306}') => Some('Ă'),
816 ('E', '\u{0306}') => Some('Ĕ'),
817 ('G', '\u{0306}') => Some('Ğ'),
818 ('I', '\u{0306}') => Some('Ĭ'),
819 ('O', '\u{0306}') => Some('Ŏ'),
820 ('U', '\u{0306}') => Some('Ŭ'),
821 ('a', '\u{0306}') => Some('ă'),
822 ('e', '\u{0306}') => Some('ĕ'),
823 ('g', '\u{0306}') => Some('ğ'),
824 ('i', '\u{0306}') => Some('ĭ'),
825 ('o', '\u{0306}') => Some('ŏ'),
826 ('u', '\u{0306}') => Some('ŭ'),
827 ('B', '\u{0307}') => Some('Ḃ'),
828 ('C', '\u{0307}') => Some('Ċ'),
829 ('D', '\u{0307}') => Some('Ḋ'),
830 ('E', '\u{0307}') => Some('Ė'),
831 ('F', '\u{0307}') => Some('Ḟ'),
832 ('G', '\u{0307}') => Some('Ġ'),
833 ('H', '\u{0307}') => Some('Ḣ'),
834 ('I', '\u{0307}') => Some('İ'),
835 ('M', '\u{0307}') => Some('Ṁ'),
836 ('N', '\u{0307}') => Some('Ṅ'),
837 ('P', '\u{0307}') => Some('Ṗ'),
838 ('R', '\u{0307}') => Some('Ṙ'),
839 ('S', '\u{0307}') => Some('Ṡ'),
840 ('T', '\u{0307}') => Some('Ṫ'),
841 ('W', '\u{0307}') => Some('Ẇ'),
842 ('X', '\u{0307}') => Some('Ẋ'),
843 ('Y', '\u{0307}') => Some('Ẏ'),
844 ('Z', '\u{0307}') => Some('Ż'),
845 ('b', '\u{0307}') => Some('ḃ'),
846 ('c', '\u{0307}') => Some('ċ'),
847 ('d', '\u{0307}') => Some('ḋ'),
848 ('e', '\u{0307}') => Some('ė'),
849 ('f', '\u{0307}') => Some('ḟ'),
850 ('g', '\u{0307}') => Some('ġ'),
851 ('h', '\u{0307}') => Some('ḣ'),
852 ('m', '\u{0307}') => Some('ṁ'),
853 ('n', '\u{0307}') => Some('ṅ'),
854 ('p', '\u{0307}') => Some('ṗ'),
855 ('r', '\u{0307}') => Some('ṙ'),
856 ('s', '\u{0307}') => Some('ṡ'),
857 ('t', '\u{0307}') => Some('ṫ'),
858 ('w', '\u{0307}') => Some('ẇ'),
859 ('x', '\u{0307}') => Some('ẋ'),
860 ('y', '\u{0307}') => Some('ẏ'),
861 ('z', '\u{0307}') => Some('ż'),
862 ('A', '\u{0308}') => Some('Ä'),
863 ('E', '\u{0308}') => Some('Ë'),
864 ('H', '\u{0308}') => Some('Ḧ'),
865 ('I', '\u{0308}') => Some('Ï'),
866 ('O', '\u{0308}') => Some('Ö'),
867 ('U', '\u{0308}') => Some('Ü'),
868 ('W', '\u{0308}') => Some('Ẅ'),
869 ('X', '\u{0308}') => Some('Ẍ'),
870 ('Y', '\u{0308}') => Some('Ÿ'),
871 ('a', '\u{0308}') => Some('ä'),
872 ('e', '\u{0308}') => Some('ë'),
873 ('h', '\u{0308}') => Some('ḧ'),
874 ('i', '\u{0308}') => Some('ï'),
875 ('o', '\u{0308}') => Some('ö'),
876 ('t', '\u{0308}') => Some('ẗ'),
877 ('u', '\u{0308}') => Some('ü'),
878 ('w', '\u{0308}') => Some('ẅ'),
879 ('x', '\u{0308}') => Some('ẍ'),
880 ('y', '\u{0308}') => Some('ÿ'),
881 ('A', '\u{030A}') => Some('Å'),
882 ('U', '\u{030A}') => Some('Ů'),
883 ('a', '\u{030A}') => Some('å'),
884 ('u', '\u{030A}') => Some('ů'),
885 ('w', '\u{030A}') => Some('ẘ'),
886 ('y', '\u{030A}') => Some('ẙ'),
887 ('O', '\u{030B}') => Some('Ő'),
888 ('U', '\u{030B}') => Some('Ű'),
889 ('o', '\u{030B}') => Some('ő'),
890 ('u', '\u{030B}') => Some('ű'),
891 ('A', '\u{030C}') => Some('Ǎ'),
892 ('C', '\u{030C}') => Some('Č'),
893 ('D', '\u{030C}') => Some('Ď'),
894 ('E', '\u{030C}') => Some('Ě'),
895 ('G', '\u{030C}') => Some('Ǧ'),
896 ('H', '\u{030C}') => Some('Ȟ'),
897 ('I', '\u{030C}') => Some('Ǐ'),
898 ('K', '\u{030C}') => Some('Ǩ'),
899 ('L', '\u{030C}') => Some('Ľ'),
900 ('N', '\u{030C}') => Some('Ň'),
901 ('O', '\u{030C}') => Some('Ǒ'),
902 ('R', '\u{030C}') => Some('Ř'),
903 ('S', '\u{030C}') => Some('Š'),
904 ('T', '\u{030C}') => Some('Ť'),
905 ('U', '\u{030C}') => Some('Ǔ'),
906 ('Z', '\u{030C}') => Some('Ž'),
907 ('a', '\u{030C}') => Some('ǎ'),
908 ('c', '\u{030C}') => Some('č'),
909 ('d', '\u{030C}') => Some('ď'),
910 ('e', '\u{030C}') => Some('ě'),
911 ('g', '\u{030C}') => Some('ǧ'),
912 ('h', '\u{030C}') => Some('ȟ'),
913 ('i', '\u{030C}') => Some('ǐ'),
914 ('j', '\u{030C}') => Some('ǰ'),
915 ('k', '\u{030C}') => Some('ǩ'),
916 ('l', '\u{030C}') => Some('ľ'),
917 ('n', '\u{030C}') => Some('ň'),
918 ('o', '\u{030C}') => Some('ǒ'),
919 ('r', '\u{030C}') => Some('ř'),
920 ('s', '\u{030C}') => Some('š'),
921 ('t', '\u{030C}') => Some('ť'),
922 ('u', '\u{030C}') => Some('ǔ'),
923 ('z', '\u{030C}') => Some('ž'),
924 ('C', '\u{0327}') => Some('Ç'),
925 ('D', '\u{0327}') => Some('Ḑ'),
926 ('E', '\u{0327}') => Some('Ȩ'),
927 ('G', '\u{0327}') => Some('Ģ'),
928 ('H', '\u{0327}') => Some('Ḩ'),
929 ('K', '\u{0327}') => Some('Ķ'),
930 ('L', '\u{0327}') => Some('Ļ'),
931 ('N', '\u{0327}') => Some('Ņ'),
932 ('R', '\u{0327}') => Some('Ŗ'),
933 ('S', '\u{0327}') => Some('Ş'),
934 ('T', '\u{0327}') => Some('Ţ'),
935 ('c', '\u{0327}') => Some('ç'),
936 ('d', '\u{0327}') => Some('ḑ'),
937 ('e', '\u{0327}') => Some('ȩ'),
938 ('g', '\u{0327}') => Some('ģ'),
939 ('h', '\u{0327}') => Some('ḩ'),
940 ('k', '\u{0327}') => Some('ķ'),
941 ('l', '\u{0327}') => Some('ļ'),
942 ('n', '\u{0327}') => Some('ņ'),
943 ('r', '\u{0327}') => Some('ŗ'),
944 ('s', '\u{0327}') => Some('ş'),
945 ('t', '\u{0327}') => Some('ţ'),
946 ('A', '\u{0328}') => Some('Ą'),
947 ('E', '\u{0328}') => Some('Ę'),
948 ('I', '\u{0328}') => Some('Į'),
949 ('O', '\u{0328}') => Some('Ǫ'),
950 ('U', '\u{0328}') => Some('Ų'),
951 ('a', '\u{0328}') => Some('ą'),
952 ('e', '\u{0328}') => Some('ę'),
953 ('i', '\u{0328}') => Some('į'),
954 ('o', '\u{0328}') => Some('ǫ'),
955 ('u', '\u{0328}') => Some('ų'),
956 _ => None,
957 }
958}
959#[allow(dead_code)]
961pub fn unique_chars(s: &str) -> Vec<char> {
962 let mut seen = Vec::new();
963 for c in s.chars() {
964 if !seen.contains(&c) {
965 seen.push(c);
966 }
967 }
968 seen
969}
970#[allow(dead_code)]
972pub fn char_frequency(s: &str) -> std::collections::HashMap<char, usize> {
973 let mut map = std::collections::HashMap::new();
974 for c in s.chars() {
975 *map.entry(c).or_insert(0) += 1;
976 }
977 map
978}
979#[allow(dead_code)]
981pub fn is_all_ascii(s: &str) -> bool {
982 s.is_ascii()
983}
984#[allow(dead_code)]
986pub fn reverse_str(s: &str) -> String {
987 s.chars().rev().collect()
988}
989#[cfg(test)]
990mod extra_char_tests {
991 use super::*;
992 #[test]
993 fn test_char_range_contains() {
994 let r = CharRange::new(65, 90);
995 assert!(r.contains(65));
996 assert!(r.contains(90));
997 assert!(!r.contains(91));
998 }
999 #[test]
1000 fn test_char_range_size() {
1001 let r = CharRange::new(65, 90);
1002 assert_eq!(r.size(), 26);
1003 }
1004 #[test]
1005 fn test_char_range_chars() {
1006 let r = CharRange::new(65, 67);
1007 let chars: Vec<char> = r.chars().collect();
1008 assert_eq!(chars, vec!['A', 'B', 'C']);
1009 }
1010 #[test]
1011 fn test_unicode_blocks_basic_latin() {
1012 assert!(UnicodeBlocks::BASIC_LATIN.contains(b'A' as u32));
1013 assert!(!UnicodeBlocks::BASIC_LATIN.contains(0x100));
1014 }
1015 #[test]
1016 fn test_unicode_blocks_is_math_operator() {
1017 assert!(UnicodeBlocks::is_math_operator(0x2200));
1018 assert!(!UnicodeBlocks::is_math_operator(b'A' as u32));
1019 }
1020 #[test]
1021 fn test_unicode_blocks_is_greek() {
1022 assert!(UnicodeBlocks::is_greek(0x03B1));
1023 assert!(!UnicodeBlocks::is_greek(b'a' as u32));
1024 }
1025 #[test]
1026 fn test_unicode_blocks_is_arrow() {
1027 assert!(UnicodeBlocks::is_arrow(0x2192));
1028 }
1029 #[test]
1030 fn test_char_scanner_peek() {
1031 let scanner = CharScanner::new("abc");
1032 assert_eq!(scanner.peek(), Some('a'));
1033 }
1034 #[test]
1035 fn test_char_scanner_advance() {
1036 let mut scanner = CharScanner::new("abc");
1037 assert_eq!(scanner.advance(), Some('a'));
1038 assert_eq!(scanner.advance(), Some('b'));
1039 assert_eq!(scanner.peek(), Some('c'));
1040 }
1041 #[test]
1042 fn test_char_scanner_eat_success() {
1043 let mut scanner = CharScanner::new("abc");
1044 assert!(scanner.eat('a'));
1045 assert_eq!(scanner.peek(), Some('b'));
1046 }
1047 #[test]
1048 fn test_char_scanner_eat_fail() {
1049 let mut scanner = CharScanner::new("abc");
1050 assert!(!scanner.eat('z'));
1051 assert_eq!(scanner.peek(), Some('a'));
1052 }
1053 #[test]
1054 fn test_char_scanner_take_while_digits() {
1055 let mut scanner = CharScanner::new("123abc");
1056 let digits = scanner.take_while(|c| c.is_ascii_digit());
1057 assert_eq!(digits, "123");
1058 assert_eq!(scanner.peek(), Some('a'));
1059 }
1060 #[test]
1061 fn test_char_scanner_is_eof() {
1062 let mut scanner = CharScanner::new("x");
1063 assert!(!scanner.is_eof());
1064 scanner.advance();
1065 assert!(scanner.is_eof());
1066 }
1067 #[test]
1068 fn test_char_scanner_remaining() {
1069 let scanner = CharScanner::new("hello");
1070 assert_eq!(scanner.remaining(), 5);
1071 }
1072 #[test]
1073 fn test_normalize_whitespace() {
1074 let s = "hello\tworld\n";
1075 let norm = normalize_whitespace(s);
1076 assert!(!norm.contains('\t'));
1077 assert!(!norm.contains('\n'));
1078 }
1079 #[test]
1080 fn test_strip_control_chars() {
1081 let s = "hello\x00world\x1b";
1082 let clean = strip_control_chars(s);
1083 assert_eq!(clean, "helloworld");
1084 }
1085 #[test]
1086 fn test_unique_chars() {
1087 let chars = unique_chars("abcabc");
1088 assert_eq!(chars, vec!['a', 'b', 'c']);
1089 }
1090 #[test]
1091 fn test_char_frequency() {
1092 let freq = char_frequency("aabbbc");
1093 assert_eq!(freq[&'a'], 2);
1094 assert_eq!(freq[&'b'], 3);
1095 assert_eq!(freq[&'c'], 1);
1096 }
1097 #[test]
1098 fn test_is_all_ascii() {
1099 assert!(is_all_ascii("hello"));
1100 assert!(!is_all_ascii("héllo"));
1101 }
1102 #[test]
1103 fn test_reverse_str() {
1104 assert_eq!(reverse_str("hello"), "olleh");
1105 assert_eq!(reverse_str(""), "");
1106 }
1107 #[test]
1108 fn test_char_scanner_consumed() {
1109 let mut scanner = CharScanner::new("abc");
1110 scanner.advance();
1111 scanner.advance();
1112 assert_eq!(scanner.consumed(), "ab");
1113 }
1114 #[test]
1115 fn test_char_scanner_peek_at_offset() {
1116 let scanner = CharScanner::new("xyz");
1117 assert_eq!(scanner.peek_at(0), Some('x'));
1118 assert_eq!(scanner.peek_at(2), Some('z'));
1119 assert_eq!(scanner.peek_at(3), None);
1120 }
1121}
1122pub fn ch_ext_app(f: Expr, a: Expr) -> Expr {
1123 Expr::App(Box::new(f), Box::new(a))
1124}
1125pub fn ch_ext_app2(f: Expr, a: Expr, b: Expr) -> Expr {
1126 ch_ext_app(ch_ext_app(f, a), b)
1127}
1128pub fn ch_ext_cst(s: &str) -> Expr {
1129 Expr::Const(Name::str(s), vec![])
1130}
1131pub fn ch_ext_prop() -> Expr {
1132 Expr::Sort(Level::zero())
1133}
1134pub fn ch_ext_type0() -> Expr {
1135 Expr::Sort(Level::succ(Level::zero()))
1136}
1137pub fn ch_ext_bvar(n: u32) -> Expr {
1138 Expr::BVar(n)
1139}
1140pub fn ch_ext_nat_ty() -> Expr {
1141 ch_ext_cst("Nat")
1142}
1143pub fn ch_ext_char_ty() -> Expr {
1144 ch_ext_cst("Char")
1145}
1146pub fn ch_ext_bool_ty() -> Expr {
1147 ch_ext_cst("Bool")
1148}
1149pub fn ch_ext_string_ty() -> Expr {
1150 ch_ext_cst("String")
1151}
1152pub fn ch_ext_list_ty(elem: Expr) -> Expr {
1153 ch_ext_app(ch_ext_cst("List"), elem)
1154}
1155pub fn ch_ext_option_ty(inner: Expr) -> Expr {
1156 ch_ext_app(ch_ext_cst("Option"), inner)
1157}
1158pub fn ch_ext_prod_ty(a: Expr, b: Expr) -> Expr {
1159 ch_ext_app2(ch_ext_cst("Prod"), a, b)
1160}
1161pub fn ch_ext_arrow(dom: Expr, cod: Expr) -> Expr {
1162 Expr::Pi(
1163 BinderInfo::Default,
1164 Name::Anonymous,
1165 Box::new(dom),
1166 Box::new(cod),
1167 )
1168}
1169pub fn ch_ext_pi(name: &str, dom: Expr, cod: Expr) -> Expr {
1170 Expr::Pi(
1171 BinderInfo::Default,
1172 Name::str(name),
1173 Box::new(dom),
1174 Box::new(cod),
1175 )
1176}
1177pub fn ch_ext_impl_pi(name: &str, dom: Expr, cod: Expr) -> Expr {
1178 Expr::Pi(
1179 BinderInfo::Implicit,
1180 Name::str(name),
1181 Box::new(dom),
1182 Box::new(cod),
1183 )
1184}
1185pub fn char_is_valid_scalar_ty() -> Expr {
1187 ch_ext_arrow(ch_ext_char_ty(), ch_ext_prop())
1188}
1189pub fn char_unicode_max_ty() -> Expr {
1191 ch_ext_nat_ty()
1192}
1193pub fn char_codepoint_lt_max_ty() -> Expr {
1195 ch_ext_pi(
1196 "c",
1197 ch_ext_char_ty(),
1198 ch_ext_app2(
1199 ch_ext_cst("Nat.lt"),
1200 ch_ext_app(ch_ext_cst("Char.toNat"), ch_ext_bvar(0)),
1201 ch_ext_cst("Char.unicodeMax"),
1202 ),
1203 )
1204}
1205pub fn char_iso_nat_ty() -> Expr {
1207 ch_ext_impl_pi(
1208 "n",
1209 ch_ext_nat_ty(),
1210 ch_ext_pi(
1211 "h",
1212 ch_ext_app2(
1213 ch_ext_cst("Nat.lt"),
1214 ch_ext_bvar(0),
1215 ch_ext_cst("Char.unicodeMax"),
1216 ),
1217 ch_ext_app2(
1218 ch_ext_cst("Eq"),
1219 ch_ext_app(
1220 ch_ext_cst("Char.toNat"),
1221 ch_ext_app(ch_ext_cst("Char.ofNat"), ch_ext_bvar(1)),
1222 ),
1223 ch_ext_bvar(1),
1224 ),
1225 ),
1226 )
1227}
1228pub fn char_of_nat_to_nat_ty() -> Expr {
1230 ch_ext_pi(
1231 "c",
1232 ch_ext_char_ty(),
1233 ch_ext_app2(
1234 ch_ext_cst("Eq"),
1235 ch_ext_app(
1236 ch_ext_cst("Char.ofNat"),
1237 ch_ext_app(ch_ext_cst("Char.toNat"), ch_ext_bvar(0)),
1238 ),
1239 ch_ext_bvar(0),
1240 ),
1241 )
1242}
1243pub fn char_to_uint32_ty() -> Expr {
1245 ch_ext_arrow(ch_ext_char_ty(), ch_ext_cst("UInt32"))
1246}
1247pub fn char_of_uint32_ty() -> Expr {
1249 ch_ext_arrow(ch_ext_cst("UInt32"), ch_ext_option_ty(ch_ext_char_ty()))
1250}
1251pub fn char_to_uint32_injective_ty() -> Expr {
1253 ch_ext_pi(
1254 "a",
1255 ch_ext_char_ty(),
1256 ch_ext_pi(
1257 "b",
1258 ch_ext_char_ty(),
1259 ch_ext_arrow(
1260 ch_ext_app2(
1261 ch_ext_cst("Eq"),
1262 ch_ext_app(ch_ext_cst("Char.toUInt32"), ch_ext_bvar(1)),
1263 ch_ext_app(ch_ext_cst("Char.toUInt32"), ch_ext_bvar(0)),
1264 ),
1265 ch_ext_app2(ch_ext_cst("Eq"), ch_ext_bvar(1), ch_ext_bvar(0)),
1266 ),
1267 ),
1268 )
1269}
1270pub fn char_dec_eq_ty() -> Expr {
1272 ch_ext_pi(
1273 "a",
1274 ch_ext_char_ty(),
1275 ch_ext_pi(
1276 "b",
1277 ch_ext_char_ty(),
1278 ch_ext_app(
1279 ch_ext_cst("Decidable"),
1280 ch_ext_app2(ch_ext_cst("Eq"), ch_ext_bvar(1), ch_ext_bvar(0)),
1281 ),
1282 ),
1283 )
1284}
1285pub fn char_lt_ty() -> Expr {
1287 ch_ext_arrow(
1288 ch_ext_char_ty(),
1289 ch_ext_arrow(ch_ext_char_ty(), ch_ext_prop()),
1290 )
1291}
1292pub fn char_le_ty() -> Expr {
1294 ch_ext_arrow(
1295 ch_ext_char_ty(),
1296 ch_ext_arrow(ch_ext_char_ty(), ch_ext_prop()),
1297 )
1298}
1299pub fn char_lt_irrefl_ty() -> Expr {
1301 ch_ext_pi(
1302 "c",
1303 ch_ext_char_ty(),
1304 ch_ext_app(
1305 ch_ext_cst("Not"),
1306 ch_ext_app2(ch_ext_cst("Char.lt"), ch_ext_bvar(0), ch_ext_bvar(0)),
1307 ),
1308 )
1309}
1310pub fn char_lt_trans_ty() -> Expr {
1312 ch_ext_pi(
1313 "a",
1314 ch_ext_char_ty(),
1315 ch_ext_pi(
1316 "b",
1317 ch_ext_char_ty(),
1318 ch_ext_pi(
1319 "c",
1320 ch_ext_char_ty(),
1321 ch_ext_arrow(
1322 ch_ext_app2(ch_ext_cst("Char.lt"), ch_ext_bvar(2), ch_ext_bvar(1)),
1323 ch_ext_arrow(
1324 ch_ext_app2(ch_ext_cst("Char.lt"), ch_ext_bvar(1), ch_ext_bvar(0)),
1325 ch_ext_app2(ch_ext_cst("Char.lt"), ch_ext_bvar(2), ch_ext_bvar(0)),
1326 ),
1327 ),
1328 ),
1329 ),
1330 )
1331}
1332pub fn char_lt_total_ty() -> Expr {
1334 ch_ext_pi(
1335 "a",
1336 ch_ext_char_ty(),
1337 ch_ext_pi(
1338 "b",
1339 ch_ext_char_ty(),
1340 ch_ext_app2(
1341 ch_ext_cst("Or"),
1342 ch_ext_app2(ch_ext_cst("Char.lt"), ch_ext_bvar(1), ch_ext_bvar(0)),
1343 ch_ext_app2(
1344 ch_ext_cst("Or"),
1345 ch_ext_app2(ch_ext_cst("Eq"), ch_ext_bvar(1), ch_ext_bvar(0)),
1346 ch_ext_app2(ch_ext_cst("Char.lt"), ch_ext_bvar(0), ch_ext_bvar(1)),
1347 ),
1348 ),
1349 ),
1350 )
1351}
1352pub fn char_is_alpha_prop_ty() -> Expr {
1354 ch_ext_arrow(ch_ext_char_ty(), ch_ext_prop())
1355}
1356pub fn char_is_digit_prop_ty() -> Expr {
1358 ch_ext_arrow(ch_ext_char_ty(), ch_ext_prop())
1359}
1360pub fn char_is_alphanum_prop_ty() -> Expr {
1362 ch_ext_arrow(ch_ext_char_ty(), ch_ext_prop())
1363}
1364pub fn char_is_space_prop_ty() -> Expr {
1366 ch_ext_arrow(ch_ext_char_ty(), ch_ext_prop())
1367}
1368pub fn char_is_punct_prop_ty() -> Expr {
1370 ch_ext_arrow(ch_ext_char_ty(), ch_ext_prop())
1371}
1372pub fn char_not_surrogate_ty() -> Expr {
1374 ch_ext_pi(
1375 "c",
1376 ch_ext_char_ty(),
1377 ch_ext_app(
1378 ch_ext_cst("Not"),
1379 ch_ext_app2(
1380 ch_ext_cst("And"),
1381 ch_ext_app2(
1382 ch_ext_cst("Nat.le"),
1383 Expr::Lit(oxilean_kernel::Literal::Nat(0xD800_u32.into())),
1384 ch_ext_app(ch_ext_cst("Char.toNat"), ch_ext_bvar(0)),
1385 ),
1386 ch_ext_app2(
1387 ch_ext_cst("Nat.le"),
1388 ch_ext_app(ch_ext_cst("Char.toNat"), ch_ext_bvar(0)),
1389 Expr::Lit(oxilean_kernel::Literal::Nat(0xDFFF_u32.into())),
1390 ),
1391 ),
1392 ),
1393 )
1394}
1395pub fn char_succ_ty() -> Expr {
1397 ch_ext_arrow(ch_ext_char_ty(), ch_ext_option_ty(ch_ext_char_ty()))
1398}
1399pub fn char_pred_ty() -> Expr {
1401 ch_ext_arrow(ch_ext_char_ty(), ch_ext_option_ty(ch_ext_char_ty()))
1402}
1403pub fn char_succ_pred_ty() -> Expr {
1405 ch_ext_pi("c", ch_ext_char_ty(), ch_ext_prop())
1406}
1407pub fn char_to_string_ty() -> Expr {
1409 ch_ext_arrow(ch_ext_char_ty(), ch_ext_string_ty())
1410}
1411pub fn char_to_list_ty() -> Expr {
1413 ch_ext_arrow(ch_ext_char_ty(), ch_ext_list_ty(ch_ext_char_ty()))
1414}
1415pub fn char_is_prefix_ty() -> Expr {
1417 ch_ext_arrow(
1418 ch_ext_char_ty(),
1419 ch_ext_arrow(ch_ext_string_ty(), ch_ext_bool_ty()),
1420 )
1421}
1422pub fn char_lex_lt_ty() -> Expr {
1424 ch_ext_arrow(
1425 ch_ext_char_ty(),
1426 ch_ext_arrow(ch_ext_char_ty(), ch_ext_prop()),
1427 )
1428}
1429pub fn char_unicode_category_ty() -> Expr {
1431 ch_ext_type0()
1432}
1433pub fn char_general_category_fn_ty() -> Expr {
1435 ch_ext_arrow(ch_ext_char_ty(), ch_ext_cst("Char.UnicodeCategory"))
1436}
1437pub fn char_is_letter_ty() -> Expr {
1439 ch_ext_arrow(ch_ext_char_ty(), ch_ext_prop())
1440}
1441pub fn char_is_number_ty() -> Expr {
1443 ch_ext_arrow(ch_ext_char_ty(), ch_ext_prop())
1444}
1445pub fn char_is_symbol_ty() -> Expr {
1447 ch_ext_arrow(ch_ext_char_ty(), ch_ext_prop())
1448}
1449pub fn char_is_mark_ty() -> Expr {
1451 ch_ext_arrow(ch_ext_char_ty(), ch_ext_prop())
1452}
1453pub fn char_utf8_width_ty() -> Expr {
1455 ch_ext_arrow(ch_ext_char_ty(), ch_ext_nat_ty())
1456}
1457pub fn char_utf8_width_range_ty() -> Expr {
1459 ch_ext_pi(
1460 "c",
1461 ch_ext_char_ty(),
1462 ch_ext_app2(
1463 ch_ext_cst("And"),
1464 ch_ext_app2(
1465 ch_ext_cst("Nat.le"),
1466 Expr::Lit(oxilean_kernel::Literal::Nat(1_u32.into())),
1467 ch_ext_app(ch_ext_cst("Char.utf8Width"), ch_ext_bvar(0)),
1468 ),
1469 ch_ext_app2(
1470 ch_ext_cst("Nat.le"),
1471 ch_ext_app(ch_ext_cst("Char.utf8Width"), ch_ext_bvar(0)),
1472 Expr::Lit(oxilean_kernel::Literal::Nat(4_u32.into())),
1473 ),
1474 ),
1475 )
1476}
1477pub fn char_utf8_bytes_ty() -> Expr {
1479 ch_ext_arrow(ch_ext_char_ty(), ch_ext_list_ty(ch_ext_nat_ty()))
1480}
1481pub fn char_nfc_normalize_ty() -> Expr {
1483 ch_ext_arrow(ch_ext_char_ty(), ch_ext_option_ty(ch_ext_char_ty()))
1484}
1485pub fn char_nfd_decompose_ty() -> Expr {
1487 ch_ext_arrow(ch_ext_char_ty(), ch_ext_list_ty(ch_ext_char_ty()))
1488}
1489pub fn char_nfkc_normalize_ty() -> Expr {
1491 ch_ext_arrow(ch_ext_char_ty(), ch_ext_option_ty(ch_ext_char_ty()))
1492}
1493pub fn char_nfkd_decompose_ty() -> Expr {
1495 ch_ext_arrow(ch_ext_char_ty(), ch_ext_list_ty(ch_ext_char_ty()))
1496}
1497pub fn char_bidi_category_ty() -> Expr {
1499 ch_ext_type0()
1500}
1501pub fn char_bidi_category_fn_ty() -> Expr {
1503 ch_ext_arrow(ch_ext_char_ty(), ch_ext_cst("Char.BidiCategory"))
1504}
1505pub fn char_is_ltr_ty() -> Expr {
1507 ch_ext_arrow(ch_ext_char_ty(), ch_ext_prop())
1508}
1509pub fn char_is_rtl_ty() -> Expr {
1511 ch_ext_arrow(ch_ext_char_ty(), ch_ext_prop())
1512}
1513pub fn char_case_fold_ty() -> Expr {
1515 ch_ext_arrow(ch_ext_char_ty(), ch_ext_char_ty())
1516}
1517pub fn char_case_fold_idempotent_ty() -> Expr {
1519 ch_ext_pi(
1520 "c",
1521 ch_ext_char_ty(),
1522 ch_ext_app2(
1523 ch_ext_cst("Eq"),
1524 ch_ext_app(
1525 ch_ext_cst("Char.caseFold"),
1526 ch_ext_app(ch_ext_cst("Char.caseFold"), ch_ext_bvar(0)),
1527 ),
1528 ch_ext_app(ch_ext_cst("Char.caseFold"), ch_ext_bvar(0)),
1529 ),
1530 )
1531}
1532pub fn char_collation_key_ty() -> Expr {
1534 ch_ext_type0()
1535}
1536pub fn char_collation_key_fn_ty() -> Expr {
1538 ch_ext_arrow(ch_ext_char_ty(), ch_ext_cst("Char.CollationKey"))
1539}
1540pub fn char_collation_le_ty() -> Expr {
1542 ch_ext_arrow(
1543 ch_ext_cst("Char.CollationKey"),
1544 ch_ext_arrow(ch_ext_cst("Char.CollationKey"), ch_ext_prop()),
1545 )
1546}
1547pub fn char_regex_class_ty() -> Expr {
1549 ch_ext_type0()
1550}
1551pub fn char_matches_class_ty() -> Expr {
1553 ch_ext_arrow(
1554 ch_ext_char_ty(),
1555 ch_ext_arrow(ch_ext_cst("Char.RegexClass"), ch_ext_bool_ty()),
1556 )
1557}
1558pub fn char_is_combining_ty() -> Expr {
1560 ch_ext_arrow(ch_ext_char_ty(), ch_ext_bool_ty())
1561}
1562pub fn char_grapheme_break_prop_ty() -> Expr {
1564 ch_ext_arrow(ch_ext_char_ty(), ch_ext_nat_ty())
1565}
1566pub fn char_is_terminal_ty() -> Expr {
1568 ch_ext_arrow(ch_ext_char_ty(), ch_ext_prop())
1569}
1570pub fn char_terminal_alphabet_ty() -> Expr {
1572 ch_ext_type0()
1573}
1574pub fn char_in_alphabet_ty() -> Expr {
1576 ch_ext_arrow(
1577 ch_ext_char_ty(),
1578 ch_ext_arrow(ch_ext_cst("Char.TerminalAlphabet"), ch_ext_prop()),
1579 )
1580}
1581pub fn char_ascii_subset_ty() -> Expr {
1583 ch_ext_pi(
1584 "c",
1585 ch_ext_char_ty(),
1586 ch_ext_arrow(
1587 ch_ext_app2(
1588 ch_ext_cst("Eq"),
1589 ch_ext_app(ch_ext_cst("Char.isAscii"), ch_ext_bvar(0)),
1590 ch_ext_cst("Bool.true"),
1591 ),
1592 ch_ext_app2(
1593 ch_ext_cst("Nat.lt"),
1594 ch_ext_app(ch_ext_cst("Char.toNat"), ch_ext_bvar(0)),
1595 Expr::Lit(oxilean_kernel::Literal::Nat(128_u32.into())),
1596 ),
1597 ),
1598 )
1599}
1600pub fn char_nat_to_digit_ty() -> Expr {
1602 ch_ext_arrow(ch_ext_nat_ty(), ch_ext_option_ty(ch_ext_char_ty()))
1603}
1604pub fn char_digit_round_trip_ty() -> Expr {
1606 ch_ext_pi(
1607 "n",
1608 ch_ext_nat_ty(),
1609 ch_ext_arrow(
1610 ch_ext_app2(
1611 ch_ext_cst("Nat.lt"),
1612 ch_ext_bvar(0),
1613 Expr::Lit(oxilean_kernel::Literal::Nat(10_u32.into())),
1614 ),
1615 ch_ext_prop(),
1616 ),
1617 )
1618}
1619pub fn char_compose_with_ty() -> Expr {
1621 ch_ext_arrow(
1622 ch_ext_char_ty(),
1623 ch_ext_arrow(ch_ext_char_ty(), ch_ext_option_ty(ch_ext_char_ty())),
1624 )
1625}
1626pub fn char_decompose_first_ty() -> Expr {
1628 ch_ext_arrow(
1629 ch_ext_char_ty(),
1630 ch_ext_option_ty(ch_ext_prod_ty(ch_ext_char_ty(), ch_ext_char_ty())),
1631 )
1632}
1633pub fn register_char_extended_axioms(env: &mut Environment) {
1635 let axioms: &[(&str, fn() -> Expr)] = &[
1636 ("Char.isValidScalar", char_is_valid_scalar_ty),
1637 ("Char.unicodeMax", char_unicode_max_ty),
1638 ("Char.codepoint_lt_max", char_codepoint_lt_max_ty),
1639 ("Char.isoNat", char_iso_nat_ty),
1640 ("Char.ofNat_toNat", char_of_nat_to_nat_ty),
1641 ("Char.toUInt32", char_to_uint32_ty),
1642 ("Char.ofUInt32", char_of_uint32_ty),
1643 ("Char.toUInt32_injective", char_to_uint32_injective_ty),
1644 ("Char.decEq", char_dec_eq_ty),
1645 ("Char.lt", char_lt_ty),
1646 ("Char.le", char_le_ty),
1647 ("Char.lt_irrefl", char_lt_irrefl_ty),
1648 ("Char.lt_trans", char_lt_trans_ty),
1649 ("Char.lt_total", char_lt_total_ty),
1650 ("Char.IsAlpha", char_is_alpha_prop_ty),
1651 ("Char.IsDigit", char_is_digit_prop_ty),
1652 ("Char.IsAlphaNum", char_is_alphanum_prop_ty),
1653 ("Char.IsSpace", char_is_space_prop_ty),
1654 ("Char.IsPunct", char_is_punct_prop_ty),
1655 ("Char.not_surrogate", char_not_surrogate_ty),
1656 ("Char.succ", char_succ_ty),
1657 ("Char.pred", char_pred_ty),
1658 ("Char.succ_pred", char_succ_pred_ty),
1659 ("Char.toString", char_to_string_ty),
1660 ("Char.toList", char_to_list_ty),
1661 ("Char.isPrefix", char_is_prefix_ty),
1662 ("Char.lexLt", char_lex_lt_ty),
1663 ("Char.UnicodeCategory", char_unicode_category_ty),
1664 ("Char.generalCategory", char_general_category_fn_ty),
1665 ("Char.isLetter", char_is_letter_ty),
1666 ("Char.isNumber", char_is_number_ty),
1667 ("Char.isSymbol", char_is_symbol_ty),
1668 ("Char.isMark", char_is_mark_ty),
1669 ("Char.utf8Width", char_utf8_width_ty),
1670 ("Char.utf8Width_range", char_utf8_width_range_ty),
1671 ("Char.utf8Bytes", char_utf8_bytes_ty),
1672 ("Char.nfcNormalize", char_nfc_normalize_ty),
1673 ("Char.nfdDecompose", char_nfd_decompose_ty),
1674 ("Char.nfkcNormalize", char_nfkc_normalize_ty),
1675 ("Char.nfkdDecompose", char_nfkd_decompose_ty),
1676 ("Char.BidiCategory", char_bidi_category_ty),
1677 ("Char.bidiCategory", char_bidi_category_fn_ty),
1678 ("Char.isLTR", char_is_ltr_ty),
1679 ("Char.isRTL", char_is_rtl_ty),
1680 ("Char.caseFold", char_case_fold_ty),
1681 ("Char.caseFold_idempotent", char_case_fold_idempotent_ty),
1682 ("Char.CollationKey", char_collation_key_ty),
1683 ("Char.collationKey", char_collation_key_fn_ty),
1684 ("Char.collationLe", char_collation_le_ty),
1685 ("Char.RegexClass", char_regex_class_ty),
1686 ("Char.matchesClass", char_matches_class_ty),
1687 ("Char.isCombining", char_is_combining_ty),
1688 ("Char.graphemeBreakProp", char_grapheme_break_prop_ty),
1689 ("Char.isTerminal", char_is_terminal_ty),
1690 ("Char.TerminalAlphabet", char_terminal_alphabet_ty),
1691 ("Char.inAlphabet", char_in_alphabet_ty),
1692 ("Char.ascii_subset", char_ascii_subset_ty),
1693 ("Char.natToDigit", char_nat_to_digit_ty),
1694 ("Char.digitToNat_natToDigit", char_digit_round_trip_ty),
1695 ("Char.composeWith", char_compose_with_ty),
1696 ("Char.decomposeFirst", char_decompose_first_ty),
1697 ];
1698 for (name, build) in axioms {
1699 let _ = env.add(Declaration::Axiom {
1700 name: Name::str(*name),
1701 univ_params: vec![],
1702 ty: build(),
1703 });
1704 }
1705}