[−][src]Crate unic
UNIC: Unicode and Internationalization Crates for Rust
The unic
super-crate (this) is a collection of all UNIC components, providing
an easy way of access to all functionalities, when all or many are needed,
instead of importing components one-by-one, and ensuring all components
imported are compatible in algorithms and consistent data-wise.
Major Components
-
char
: Unicode Character utilities. -
ucd
: Unicode Character Database. (UAX#44). -
bidi
: Unicode Bidirectional Algorithm (UAX#9). -
normal
: Unicode Normalization Forms (UAX#15). -
segment
: Unicode Text Segmentation (UAX#29). -
idna
: Unicode IDNA Compatibility Processing (UTS#46).
A Basic Example
use unic::ucd::common::is_alphanumeric; use unic::bidi::BidiInfo; use unic::normal::StrNormalForm; use unic::segment::{GraphemeIndices, Graphemes, WordBoundIndices, WordBounds, Words}; use unic::ucd::normal::compose; use unic::ucd::{is_cased, Age, BidiClass, CharAge, CharBidiClass, StrBidiClass, UnicodeVersion}; #[cfg_attr(rustfmt, rustfmt_skip)] #[test] fn test_sample() { // Age assert_eq!(Age::of('A').unwrap().actual(), UnicodeVersion { major: 1, minor: 1, micro: 0 }); assert_eq!(Age::of('\u{A0000}'), None); assert_eq!( Age::of('\u{10FFFF}').unwrap().actual(), UnicodeVersion { major: 2, minor: 0, micro: 0 } ); if let Some(age) = '🦊'.age() { assert_eq!(age.actual().major, 9); assert_eq!(age.actual().minor, 0); assert_eq!(age.actual().micro, 0); } // Bidi let text = concat![ "א", "ב", "ג", "a", "b", "c", ]; assert!(!text.has_bidi_explicit()); assert!(text.has_rtl()); assert!(text.has_ltr()); assert_eq!(text.chars().nth(0).unwrap().bidi_class(), BidiClass::RightToLeft); assert!(!text.chars().nth(0).unwrap().is_ltr()); assert!(text.chars().nth(0).unwrap().is_rtl()); assert_eq!(text.chars().nth(3).unwrap().bidi_class(), BidiClass::LeftToRight); assert!(text.chars().nth(3).unwrap().is_ltr()); assert!(!text.chars().nth(3).unwrap().is_rtl()); let bidi_info = BidiInfo::new(text, None); assert_eq!(bidi_info.paragraphs.len(), 1); let para = &bidi_info.paragraphs[0]; assert_eq!(para.level.number(), 1); assert_eq!(para.level.is_rtl(), true); let line = para.range.clone(); let display = bidi_info.reorder_line(para, line); assert_eq!( display, concat![ "a", "b", "c", "ג", "ב", "א", ] ); // Case assert_eq!(is_cased('A'), true); assert_eq!(is_cased('א'), false); // Normalization assert_eq!(compose('A', '\u{030A}'), Some('Å')); let s = "ÅΩ"; let c = s.nfc().collect::<String>(); assert_eq!(c, "ÅΩ"); // Segmentation assert_eq!( Graphemes::new("a\u{310}e\u{301}o\u{308}\u{332}").collect::<Vec<&str>>(), &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"] ); assert_eq!( Graphemes::new("a\r\nb🇺🇳🇮🇨").collect::<Vec<&str>>(), &["a", "\r\n", "b", "🇺🇳", "🇮🇨"] ); assert_eq!( GraphemeIndices::new("a̐éö̲\r\n").collect::<Vec<(usize, &str)>>(), &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")] ); assert_eq!( Words::new( "The quick (\"brown\") fox can't jump 32.3 feet, right?", |s: &&str| s.chars().any(is_alphanumeric), ).collect::<Vec<&str>>(), &["The", "quick", "brown", "fox", "can't", "jump", "32.3", "feet", "right"] ); assert_eq!( WordBounds::new("The quick (\"brown\") fox").collect::<Vec<&str>>(), &["The", " ", "quick", " ", "(", "\"", "brown", "\"", ")", " ", " ", "fox"] ); assert_eq!( WordBoundIndices::new("Brr, it's 29.3°F!").collect::<Vec<(usize, &str)>>(), &[ (0, "Brr"), (3, ","), (4, " "), (5, "it's"), (9, " "), (10, "29.3"), (14, "°"), (16, "F"), (17, "!") ] ); }
Re-exports
pub use unic_bidi as bidi; |
pub use unic_char as char; |
pub use unic_emoji as emoji; |
pub use unic_idna as idna; |
pub use unic_normal as normal; |
pub use unic_segment as segment; |
pub use unic_ucd as ucd; |
Constants
PKG_DESCRIPTION | UNIC component description. |
PKG_NAME | UNIC component name. |
PKG_VERSION | UNIC component version. |
UNICODE_VERSION | The Version of The Unicode Standard of the Unicode Character Database in use. |