egg_mode/
entities.rs

1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5//! Data structures containing extracted URL, mention, tag, and media information.
6//!
7//! These structures are meant to be received in an API call to describe the data they accompany.
8//! For example, a `UrlEntity` describes a hyperlink in a tweet or user description text, and a
9//! `HashtagEntity` describes a hashtag or stock symbol extracted from a tweet.
10//!
11//! For more information on the data in these structures, see Twitter's documentation for
12//! [Entities Object][ent] and [Extended Entities Object][ext-ent].
13//!
14//! [ent]: https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/entities-object
15//! [ext-ent]: https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/extended-entities-object
16//!
17//! ## Entity Ranges
18//!
19//! Entities that refer to elements within a text have a `range` field that contains the text span
20//! that is being referenced. The numbers in question are byte offsets, so if you have an entity
21//! that you'd like to slice out of the source text, you can use the indices directly in slicing
22//! operations:
23//!
24//! ```rust
25//! # use egg_mode::entities::HashtagEntity;
26//! # let entity = HashtagEntity { range: (0, 0), text: "".to_string() };
27//! # let text = "asdf";
28//! let slice = &text[entity.range.0..entity.range.1];
29//! ```
30//!
31//! ### Shortened, Display, and Expanded URLs
32//!
33//! URL and Media entities contain references to a URL within their parent text. However, due to
34//! the nature of how Twitter handles URLs in tweets and user bios, each entity struct has three
35//! URLs within it:
36//!
37//! - `url`: This is the `t.co` shortened URL as returned directly from twitter. This is what
38//!   contributes to character count in tweets and user bios.
39//! - `expanded_url`: This is the original URL the user entered in their tweet. While it is given
40//!   to API client, Twitter recommends still sending users to the shortened link, for analytics
41//!   purposes. Twitter Web uses this field to supply hover-text for where the URL resolves to.
42//! - `display_url`: This is a truncated version of `expanded_url`, meant to be displayed inline
43//!   with the parent text. This is useful to show users where the link resolves to, without
44//!   potentially filling up a lot of space with the fullly expanded URL.
45use mime;
46use serde::{Deserialize, Deserializer, Serialize};
47
48use crate::common::serde_via_string;
49
50///Represents a hashtag or symbol extracted from another piece of text.
51#[derive(Debug, Clone, Deserialize, Serialize)]
52pub struct HashtagEntity {
53    ///The byte offsets where the hashtag is located. The first index is the location of the # or $
54    ///character; the second is the location of the first character following the hashtag.
55    #[serde(rename = "indices")]
56    pub range: (usize, usize),
57    ///The text of the hashtag, without the leading # or $ character.
58    pub text: String,
59}
60
61///Represents a piece of media attached to a tweet.
62///
63///The information in this struct is subtly different depending on what media is being referenced,
64///and which entity container is holding this instance. For videos and GIFs, the `media_url` and
65///`media_url_https` fields each link to a thumbnail image of the media, typically of the first
66///frame. The real video information can be found on the `video_info` field, including various
67///encodings if available.
68///
69///Image links available in `media_url` and `media_url_https` can be obtained in different sizes by
70///appending a colon and one of the available sizes in the `MediaSizes` struct. For example, the
71///cropped thumbnail can be viewed by appending `:thumb` to the end of the URL, and the full-size
72///image can be viewed by appending `:large`.
73#[derive(Debug, Clone, Deserialize, Serialize)]
74pub struct MediaEntity {
75    ///A shortened URL to display to clients.
76    pub display_url: String,
77    ///An expanded version of `display_url`; links to the media display page.
78    pub expanded_url: String,
79    ///A numeric ID for the media.
80    pub id: u64,
81    ///The byte offsets where the media URL is located. The first index is the location of the
82    ///first character of the URL; the second is the location of the first character following the
83    ///URL.
84    #[serde(rename = "indices")]
85    pub range: (usize, usize),
86    ///A URL pointing directly to the media file. Uses HTTP as the protocol.
87    ///
88    ///For videos and GIFs, this link will be to a thumbnail of the media, and the real video link
89    ///will be contained in `video_info`.
90    pub media_url: String,
91    ///A URL pointing directly to the media file. Uses HTTPS as the protocol.
92    ///
93    ///For videos and GIFs, this link will be to a thumbnail of the media, and the real video link
94    ///will be contained in `video_info`.
95    pub media_url_https: String,
96    ///Various sizes available for the media file.
97    pub sizes: MediaSizes,
98    ///For tweets containing media that was originally associated with a different tweet, this
99    ///contains the ID of the original tweet.
100    pub source_status_id: Option<u64>,
101    ///The type of media being represented.
102    #[serde(rename = "type")]
103    pub media_type: MediaType,
104    ///The t.co link from the original text.
105    pub url: String,
106    ///For media entities corresponding to videos, this contains extra information about the linked
107    ///video.
108    pub video_info: Option<VideoInfo>,
109    ///Media alt text, if present.
110    pub ext_alt_text: Option<String>,
111}
112
113///Represents the types of media that can be attached to a tweet.
114#[derive(Debug, Copy, Clone, Deserialize, Serialize, PartialEq)]
115pub enum MediaType {
116    ///A static image.
117    #[serde(rename = "photo")]
118    Photo,
119    ///A video.
120    #[serde(rename = "video")]
121    Video,
122    ///An animated GIF, delivered as a video without audio.
123    #[serde(rename = "animated_gif")]
124    Gif,
125}
126
127///Represents the available sizes for a media file.
128#[derive(Debug, Copy, Clone, Deserialize, Serialize)]
129pub struct MediaSizes {
130    ///Information for a thumbnail-sized version of the media.
131    pub thumb: MediaSize,
132    ///Information for a small-sized version of the media.
133    pub small: MediaSize,
134    ///Information for a medium-sized version of the media.
135    pub medium: MediaSize,
136    ///Information for a large-sized version of the media.
137    pub large: MediaSize,
138}
139
140///Represents how an image has been resized for a given size variant.
141#[derive(Debug, Copy, Clone, Deserialize, Serialize, PartialEq)]
142pub enum ResizeMode {
143    ///The media was resized to fit one dimension, keeping its aspect ratio.
144    #[serde(rename = "fit")]
145    Fit,
146    ///The media was cropped to fit a specific resolution.
147    #[serde(rename = "crop")]
148    Crop,
149}
150
151///Represents the dimensions of a media file.
152#[derive(Debug, Copy, Clone, Deserialize, Serialize)]
153pub struct MediaSize {
154    ///The size variant's width in pixels.
155    pub w: i32,
156    ///The size variant's height in pixels.
157    pub h: i32,
158    ///The method used to obtain the given dimensions.
159    pub resize: ResizeMode,
160}
161
162///Represents metadata specific to videos.
163#[derive(Debug, Clone, Deserialize, Serialize)]
164pub struct VideoInfo {
165    ///The aspect ratio of the video.
166    pub aspect_ratio: (i32, i32),
167    ///The duration of the video, in milliseconds.
168    ///
169    ///This field is not given for animated GIFs.
170    pub duration_millis: Option<i32>,
171    ///Information about various encodings available for the video.
172    pub variants: Vec<VideoVariant>,
173}
174
175///Represents information about a specific encoding of a video.
176#[derive(Debug, Clone, Deserialize, Serialize)]
177pub struct VideoVariant {
178    ///The bitrate of the video. This value is present for GIFs, but it will be zero.
179    pub bitrate: Option<i32>,
180    ///The file format of the video variant.
181    #[serde(with = "serde_via_string")]
182    pub content_type: mime::Mime,
183    ///The URL for the video variant.
184    pub url: String,
185}
186
187///Represents a link extracted from another piece of text.
188#[derive(Debug, Clone, Deserialize, Serialize)]
189pub struct UrlEntity {
190    ///A truncated URL meant to be displayed inline with the text.
191    #[serde(default)]
192    pub display_url: String,
193    ///The URL that the t.co URL resolves to.
194    ///
195    ///Meant to be used as hover-text when a user mouses over a link.
196    #[serde(default)]
197    pub expanded_url: Option<String>,
198    ///The byte offsets in the companion text where the URL was extracted from.
199    #[serde(rename = "indices")]
200    pub range: (usize, usize),
201    ///The t.co URL extracted from the companion text.
202    pub url: String,
203}
204
205///Represnts a user mention extracted from another piece of text.
206#[derive(Debug, Clone, Deserialize, Serialize)]
207pub struct MentionEntity {
208    ///Numeric ID of the mentioned user.
209    #[serde(deserialize_with = "nullable_id")] // Very rarely this field is null
210    pub id: u64,
211    ///The byte offsets where the user mention is located in the original text. The first index is
212    ///the location of the @ symbol; the second is the location of the first character following
213    ///the user screen name.
214    #[serde(rename = "indices")]
215    pub range: (usize, usize),
216    ///Display name of the mentioned user.
217    #[serde(deserialize_with = "nullable_str")] // Very rarely, this field is null
218    pub name: String,
219    ///Screen name of the mentioned user, without the leading @ symbol.
220    pub screen_name: String,
221}
222
223fn nullable_id<'de, D>(deserializer: D) -> Result<u64, D::Error>
224where
225    D: Deserializer<'de>,
226{
227    let opt = Option::deserialize(deserializer)?;
228    Ok(opt.unwrap_or_default())
229}
230
231fn nullable_str<'de, D>(deserializer: D) -> Result<String, D::Error>
232where
233    D: Deserializer<'de>,
234{
235    let opt = Option::deserialize(deserializer)?;
236    Ok(opt.unwrap_or_default())
237}