1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
// Copyright 2012-2015 The Rust Project Developers. // Copyright 2017 The UNIC Project Developers. // // See the COPYRIGHT file at the top-level directory of this distribution. // // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your // option. This file may not be copied, modified, or distributed // except according to those terms. #![warn( bad_style, missing_debug_implementations, missing_docs, unconditional_recursion )] #![forbid(unsafe_code)] //! # UNIC — Unicode Text Segmentation Algorithms //! //! A component of [`unic`: Unicode and Internationalization Crates for Rust](/unic/). //! //! This UNIC component implements algorithms from [Unicode® Standard Annex #29 - //! Unicode Text Segmentation](http://unicode.org/reports/tr29/), used for detecting //! boundaries of text element boundaries, such as user-perceived characters (a.k.a. //! *Grapheme Clusters)*, *Words*, and *Sentences* (last one not implemented yet). //! //! # Examples //! //! ```rust //! # use unic_segment::{GraphemeIndices, Graphemes, WordBoundIndices, WordBounds, Words}; //! assert_eq!( //! Graphemes::new("a\u{310}e\u{301}o\u{308}\u{332}").collect::<Vec<&str>>(), //! &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"] //! ); //! //! assert_eq!( //! Graphemes::new("a\r\nb🇺🇳🇮🇨").collect::<Vec<&str>>(), //! &["a", "\r\n", "b", "🇺🇳", "🇮🇨"] //! ); //! //! assert_eq!( //! GraphemeIndices::new("a̐éö̲\r\n").collect::<Vec<(usize, &str)>>(), //! &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")] //! ); //! //! fn has_alphanumeric(s: &&str) -> bool { //! s.chars().any(|ch| ch.is_alphanumeric()) //! } //! //! assert_eq!( //! Words::new( //! "The quick (\"brown\") fox can't jump 32.3 feet, right?", //! has_alphanumeric, //! ).collect::<Vec<&str>>(), //! &["The", "quick", "brown", "fox", "can't", "jump", "32.3", "feet", "right"] //! ); //! //! assert_eq!( //! WordBounds::new("The quick (\"brown\") fox").collect::<Vec<&str>>(), //! &["The", " ", "quick", " ", "(", "\"", "brown", "\"", ")", " ", " ", "fox"] //! ); //! //! assert_eq!( //! WordBoundIndices::new("Brr, it's 29.3°F!").collect::<Vec<(usize, &str)>>(), //! &[ //! (0, "Brr"), //! (3, ","), //! (4, " "), //! (5, "it's"), //! (9, " "), //! (10, "29.3"), //! (14, "°"), //! (16, "F"), //! (17, "!") //! ] //! ); //! ``` pub use unic_ucd_segment::UNICODE_VERSION; mod pkg_info; pub use crate::pkg_info::{PKG_DESCRIPTION, PKG_NAME, PKG_VERSION}; mod grapheme; pub use crate::grapheme::{GraphemeCursor, GraphemeIncomplete, GraphemeIndices, Graphemes}; mod word; pub use crate::word::{WordBoundIndices, WordBounds, Words};