unic/lib.rs
1// Copyright 2017 The UNIC Project Developers.
2//
3// See the COPYRIGHT file at the top-level directory of this distribution.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11#![warn(
12 bad_style,
13 missing_debug_implementations,
14 missing_docs,
15 unconditional_recursion
16)]
17#![forbid(unsafe_code)]
18
19//! # UNIC: Unicode and Internationalization Crates for Rust
20//!
21//! The `unic` super-crate (this) is a collection of all UNIC components, providing
22//! an easy way of access to all functionalities, when all or many are needed,
23//! instead of importing components one-by-one, and ensuring all components
24//! imported are compatible in algorithms and consistent data-wise.
25//!
26//! ## Major Components
27//!
28//! - [`char`](/unic-char): Unicode Character utilities.
29//!
30//! - [`ucd`](/unic-ucd): Unicode Character Database. (UAX\#44).
31//!
32//! - [`bidi`](/unic-bidi): Unicode Bidirectional Algorithm (UAX\#9).
33//!
34//! - [`normal`](/unic-normal): Unicode Normalization Forms (UAX\#15).
35//!
36//! - [`segment`](/unic-segment): Unicode Text Segmentation (UAX\#29).
37//!
38//! - [`idna`](/unic-idna): Unicode IDNA Compatibility Processing (UTS\#46).
39//!
40//!
41//! ## A Basic Example
42//!
43//! ```rust
44//! use unic::ucd::common::is_alphanumeric;
45//! use unic::bidi::BidiInfo;
46//! use unic::normal::StrNormalForm;
47//! use unic::segment::{GraphemeIndices, Graphemes, WordBoundIndices, WordBounds, Words};
48//! use unic::ucd::normal::compose;
49//! use unic::ucd::{is_cased, Age, BidiClass, CharAge, CharBidiClass, StrBidiClass, UnicodeVersion};
50//!
51//! #[cfg_attr(rustfmt, rustfmt_skip)]
52//! #[test]
53//! fn test_sample() {
54//!
55//! // Age
56//!
57//! assert_eq!(Age::of('A').unwrap().actual(), UnicodeVersion { major: 1, minor: 1, micro: 0 });
58//! assert_eq!(Age::of('\u{A0000}'), None);
59//! assert_eq!(
60//! Age::of('\u{10FFFF}').unwrap().actual(),
61//! UnicodeVersion { major: 2, minor: 0, micro: 0 }
62//! );
63//!
64//! if let Some(age) = '🦊'.age() {
65//! assert_eq!(age.actual().major, 9);
66//! assert_eq!(age.actual().minor, 0);
67//! assert_eq!(age.actual().micro, 0);
68//! }
69//!
70//! // Bidi
71//!
72//! let text = concat![
73//! "א",
74//! "ב",
75//! "ג",
76//! "a",
77//! "b",
78//! "c",
79//! ];
80//!
81//! assert!(!text.has_bidi_explicit());
82//! assert!(text.has_rtl());
83//! assert!(text.has_ltr());
84//!
85//! assert_eq!(text.chars().nth(0).unwrap().bidi_class(), BidiClass::RightToLeft);
86//! assert!(!text.chars().nth(0).unwrap().is_ltr());
87//! assert!(text.chars().nth(0).unwrap().is_rtl());
88//!
89//! assert_eq!(text.chars().nth(3).unwrap().bidi_class(), BidiClass::LeftToRight);
90//! assert!(text.chars().nth(3).unwrap().is_ltr());
91//! assert!(!text.chars().nth(3).unwrap().is_rtl());
92//!
93//! let bidi_info = BidiInfo::new(text, None);
94//! assert_eq!(bidi_info.paragraphs.len(), 1);
95//!
96//! let para = &bidi_info.paragraphs[0];
97//! assert_eq!(para.level.number(), 1);
98//! assert_eq!(para.level.is_rtl(), true);
99//!
100//! let line = para.range.clone();
101//! let display = bidi_info.reorder_line(para, line);
102//! assert_eq!(
103//! display,
104//! concat![
105//! "a",
106//! "b",
107//! "c",
108//! "ג",
109//! "ב",
110//! "א",
111//! ]
112//! );
113//!
114//! // Case
115//!
116//! assert_eq!(is_cased('A'), true);
117//! assert_eq!(is_cased('א'), false);
118//!
119//! // Normalization
120//!
121//! assert_eq!(compose('A', '\u{030A}'), Some('Å'));
122//!
123//! let s = "ÅΩ";
124//! let c = s.nfc().collect::<String>();
125//! assert_eq!(c, "ÅΩ");
126//!
127//! // Segmentation
128//!
129//! assert_eq!(
130//! Graphemes::new("a\u{310}e\u{301}o\u{308}\u{332}").collect::<Vec<&str>>(),
131//! &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"]
132//! );
133//!
134//! assert_eq!(
135//! Graphemes::new("a\r\nb🇺🇳🇮🇨").collect::<Vec<&str>>(),
136//! &["a", "\r\n", "b", "🇺🇳", "🇮🇨"]
137//! );
138//!
139//! assert_eq!(
140//! GraphemeIndices::new("a̐éö̲\r\n").collect::<Vec<(usize, &str)>>(),
141//! &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")]
142//! );
143//!
144//! assert_eq!(
145//! Words::new(
146//! "The quick (\"brown\") fox can't jump 32.3 feet, right?",
147//! |s: &&str| s.chars().any(is_alphanumeric),
148//! ).collect::<Vec<&str>>(),
149//! &["The", "quick", "brown", "fox", "can't", "jump", "32.3", "feet", "right"]
150//! );
151//!
152//! assert_eq!(
153//! WordBounds::new("The quick (\"brown\") fox").collect::<Vec<&str>>(),
154//! &["The", " ", "quick", " ", "(", "\"", "brown", "\"", ")", " ", " ", "fox"]
155//! );
156//!
157//! assert_eq!(
158//! WordBoundIndices::new("Brr, it's 29.3°F!").collect::<Vec<(usize, &str)>>(),
159//! &[
160//! (0, "Brr"),
161//! (3, ","),
162//! (4, " "),
163//! (5, "it's"),
164//! (9, " "),
165//! (10, "29.3"),
166//! (14, "°"),
167//! (16, "F"),
168//! (17, "!")
169//! ]
170//! );
171//! }
172//! ```
173
174pub use unic_bidi as bidi;
175pub use unic_char as char;
176pub use unic_emoji as emoji;
177pub use unic_idna as idna;
178pub use unic_normal as normal;
179pub use unic_segment as segment;
180pub use unic_ucd as ucd;
181
182/// The [Unicode version](https://www.unicode.org/versions/) of data
183pub use crate::ucd::UNICODE_VERSION;
184
185mod pkg_info;
186pub use crate::pkg_info::{PKG_DESCRIPTION, PKG_NAME, PKG_VERSION};