1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
// Copyright 2019 Robert Sayre // Licensed under the Apache License, Version 2.0 // http://www.apache.org/licenses/LICENSE-2.0 extern crate twitter_text_config; extern crate twitter_text_parser; extern crate unicode_normalization; extern crate idna; extern crate pest; pub mod extractor; pub mod hit_highlighter; pub mod autolinker; pub mod entity; pub mod validator; use twitter_text_config::Configuration; use twitter_text_config::Range; use extractor::Extract; use extractor::ValidatingExtractor; /** * A struct that represents a parsed tweet containing the length of the tweet, * its validity, display ranges etc. The name mirrors Twitter's Java implementation. */ #[derive(PartialEq, Eq, Hash, Debug, Clone, Copy)] pub struct TwitterTextParseResults { /// The weighted length is the number used to determine the tweet's length for the purposes of Twitter's limit of 280. Most characters count /// for 2 units, while a few ranges (like ASCII and Latin-1) count for 1. See [Twitter's blog post](https://blog.twitter.com/official/en_us/topics/product/2017/Giving-you-more-characters-to-express-yourself.html). pub weighted_length: i32, /// The weighted length expressed as a number relative to a limit of 1000. /// This value makes it easier to implement UI like Twitter's tweet-length meter. pub permillage: i32, /// Whether the tweet is valid: its weighted length must be under the configured limit, it must /// not be empty, and it must not contain invalid characters. pub is_valid: bool, /// The display range expressed in UTF-16. pub display_text_range: Range, /// The valid display range expressed in UTF-16. After the end of the valid range, clients /// typically stop highlighting entities, etc. pub valid_text_range: Range } impl TwitterTextParseResults { /// A new TwitterTextParseResults struct with all fields supplied as arguments. pub fn new(weighted_length: i32, permillage: i32, is_valid: bool, display_text_range: Range, valid_text_range: Range) -> TwitterTextParseResults { TwitterTextParseResults { weighted_length, permillage, is_valid, display_text_range, valid_text_range } } /// An invalid TwitterTextParseResults struct. This function produces the return value when /// empty text or invalid UTF-8 is supplied to parse(). pub fn empty() -> TwitterTextParseResults { TwitterTextParseResults { weighted_length: 0, permillage: 0, is_valid: false, display_text_range: Range::empty(), valid_text_range: Range::empty() } } } /** * Produce a [TwitterTextParseResults] struct from a [str]. If extract_urls is true, the weighted * length will give all URLs the weight supplied in [Configuration](twitter_text_configuration::Configuration), * regardless of their length. * * This function will allocate an NFC-normalized copy of the input string. If the text is already * NFC-normalized, [ValidatingExtractor::new_with_nfc_input] will be more efficient. */ pub fn parse(text: &str, config: &Configuration, extract_urls: bool) -> TwitterTextParseResults { let mut extractor = ValidatingExtractor::new(config); let input = extractor.prep_input(text); if extract_urls { extractor.extract_urls_with_indices(input.as_str()).parse_results } else { extractor.extract_scan(input.as_str()).parse_results } }