twitter_text/lib.rs
1// Copyright 2019 Robert Sayre
2// Licensed under the Apache License, Version 2.0
3// http://www.apache.org/licenses/LICENSE-2.0
4
5extern crate twitter_text_config;
6extern crate twitter_text_parser;
7extern crate unicode_normalization;
8extern crate idna;
9extern crate pest;
10
11pub mod extractor;
12pub mod hit_highlighter;
13pub mod autolinker;
14pub mod entity;
15pub mod validator;
16
17use twitter_text_config::Configuration;
18use twitter_text_config::Range;
19use extractor::Extract;
20use extractor::ValidatingExtractor;
21
22/**
23 * A struct that represents a parsed tweet containing the length of the tweet,
24 * its validity, display ranges etc. The name mirrors Twitter's Java implementation.
25 */
26#[derive(PartialEq, Eq, Hash, Debug, Clone, Copy)]
27pub struct TwitterTextParseResults {
28 /// The weighted length is the number used to determine the tweet's length for the purposes of Twitter's limit of 280. Most characters count
29 /// for 2 units, while a few ranges (like ASCII and Latin-1) count for 1. See [Twitter's blog post](https://blog.twitter.com/official/en_us/topics/product/2017/Giving-you-more-characters-to-express-yourself.html).
30 pub weighted_length: i32,
31
32 /// The weighted length expressed as a number relative to a limit of 1000.
33 /// This value makes it easier to implement UI like Twitter's tweet-length meter.
34 pub permillage: i32,
35
36 /// Whether the tweet is valid: its weighted length must be under the configured limit, it must
37 /// not be empty, and it must not contain invalid characters.
38 pub is_valid: bool,
39
40 /// The display range expressed in UTF-16.
41 pub display_text_range: Range,
42
43 /// The valid display range expressed in UTF-16. After the end of the valid range, clients
44 /// typically stop highlighting entities, etc.
45 pub valid_text_range: Range
46}
47
48impl TwitterTextParseResults {
49 /// A new TwitterTextParseResults struct with all fields supplied as arguments.
50 pub fn new(weighted_length: i32,
51 permillage: i32,
52 is_valid: bool,
53 display_text_range: Range,
54 valid_text_range: Range) -> TwitterTextParseResults {
55 TwitterTextParseResults {
56 weighted_length,
57 permillage,
58 is_valid,
59 display_text_range,
60 valid_text_range
61 }
62 }
63
64 /// An invalid TwitterTextParseResults struct. This function produces the return value when
65 /// empty text or invalid UTF-8 is supplied to parse().
66 pub fn empty() -> TwitterTextParseResults {
67 TwitterTextParseResults {
68 weighted_length: 0,
69 permillage: 0,
70 is_valid: false,
71 display_text_range: Range::empty(),
72 valid_text_range: Range::empty()
73 }
74 }
75}
76
77/**
78 * Produce a [TwitterTextParseResults] struct from a [str]. If extract_urls is true, the weighted
79 * length will give all URLs the weight supplied in [Configuration](twitter_text_configuration::Configuration),
80 * regardless of their length.
81 *
82 * This function will allocate an NFC-normalized copy of the input string. If the text is already
83 * NFC-normalized, [ValidatingExtractor::new_with_nfc_input] will be more efficient.
84 */
85pub fn parse(text: &str, config: &Configuration, extract_urls: bool) -> TwitterTextParseResults {
86 let mut extractor = ValidatingExtractor::new(config);
87 let input = extractor.prep_input(text);
88 if extract_urls {
89 extractor.extract_urls_with_indices(input.as_str()).parse_results
90 } else {
91 extractor.extract_scan(input.as_str()).parse_results
92 }
93}