egg_mode/entities.rs
1// This Source Code Form is subject to the terms of the Mozilla Public
2// License, v. 2.0. If a copy of the MPL was not distributed with this
3// file, You can obtain one at http://mozilla.org/MPL/2.0/.
4
5//! Data structures containing extracted URL, mention, tag, and media information.
6//!
7//! These structures are meant to be received in an API call to describe the data they accompany.
8//! For example, a `UrlEntity` describes a hyperlink in a tweet or user description text, and a
9//! `HashtagEntity` describes a hashtag or stock symbol extracted from a tweet.
10//!
11//! For more information on the data in these structures, see Twitter's documentation for
12//! [Entities Object][ent] and [Extended Entities Object][ext-ent].
13//!
14//! [ent]: https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/entities-object
15//! [ext-ent]: https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/extended-entities-object
16//!
17//! ## Entity Ranges
18//!
19//! Entities that refer to elements within a text have a `range` field that contains the text span
20//! that is being referenced. The numbers in question are byte offsets, so if you have an entity
21//! that you'd like to slice out of the source text, you can use the indices directly in slicing
22//! operations:
23//!
24//! ```rust
25//! # use egg_mode::entities::HashtagEntity;
26//! # let entity = HashtagEntity { range: (0, 0), text: "".to_string() };
27//! # let text = "asdf";
28//! let slice = &text[entity.range.0..entity.range.1];
29//! ```
30//!
31//! ### Shortened, Display, and Expanded URLs
32//!
33//! URL and Media entities contain references to a URL within their parent text. However, due to
34//! the nature of how Twitter handles URLs in tweets and user bios, each entity struct has three
35//! URLs within it:
36//!
37//! - `url`: This is the `t.co` shortened URL as returned directly from twitter. This is what
38//! contributes to character count in tweets and user bios.
39//! - `expanded_url`: This is the original URL the user entered in their tweet. While it is given
40//! to API client, Twitter recommends still sending users to the shortened link, for analytics
41//! purposes. Twitter Web uses this field to supply hover-text for where the URL resolves to.
42//! - `display_url`: This is a truncated version of `expanded_url`, meant to be displayed inline
43//! with the parent text. This is useful to show users where the link resolves to, without
44//! potentially filling up a lot of space with the fullly expanded URL.
45use mime;
46use serde::{Deserialize, Deserializer, Serialize};
47
48use crate::common::serde_via_string;
49
50///Represents a hashtag or symbol extracted from another piece of text.
51#[derive(Debug, Clone, Deserialize, Serialize)]
52pub struct HashtagEntity {
53 ///The byte offsets where the hashtag is located. The first index is the location of the # or $
54 ///character; the second is the location of the first character following the hashtag.
55 #[serde(rename = "indices")]
56 pub range: (usize, usize),
57 ///The text of the hashtag, without the leading # or $ character.
58 pub text: String,
59}
60
61///Represents a piece of media attached to a tweet.
62///
63///The information in this struct is subtly different depending on what media is being referenced,
64///and which entity container is holding this instance. For videos and GIFs, the `media_url` and
65///`media_url_https` fields each link to a thumbnail image of the media, typically of the first
66///frame. The real video information can be found on the `video_info` field, including various
67///encodings if available.
68///
69///Image links available in `media_url` and `media_url_https` can be obtained in different sizes by
70///appending a colon and one of the available sizes in the `MediaSizes` struct. For example, the
71///cropped thumbnail can be viewed by appending `:thumb` to the end of the URL, and the full-size
72///image can be viewed by appending `:large`.
73#[derive(Debug, Clone, Deserialize, Serialize)]
74pub struct MediaEntity {
75 ///A shortened URL to display to clients.
76 pub display_url: String,
77 ///An expanded version of `display_url`; links to the media display page.
78 pub expanded_url: String,
79 ///A numeric ID for the media.
80 pub id: u64,
81 ///The byte offsets where the media URL is located. The first index is the location of the
82 ///first character of the URL; the second is the location of the first character following the
83 ///URL.
84 #[serde(rename = "indices")]
85 pub range: (usize, usize),
86 ///A URL pointing directly to the media file. Uses HTTP as the protocol.
87 ///
88 ///For videos and GIFs, this link will be to a thumbnail of the media, and the real video link
89 ///will be contained in `video_info`.
90 pub media_url: String,
91 ///A URL pointing directly to the media file. Uses HTTPS as the protocol.
92 ///
93 ///For videos and GIFs, this link will be to a thumbnail of the media, and the real video link
94 ///will be contained in `video_info`.
95 pub media_url_https: String,
96 ///Various sizes available for the media file.
97 pub sizes: MediaSizes,
98 ///For tweets containing media that was originally associated with a different tweet, this
99 ///contains the ID of the original tweet.
100 pub source_status_id: Option<u64>,
101 ///The type of media being represented.
102 #[serde(rename = "type")]
103 pub media_type: MediaType,
104 ///The t.co link from the original text.
105 pub url: String,
106 ///For media entities corresponding to videos, this contains extra information about the linked
107 ///video.
108 pub video_info: Option<VideoInfo>,
109 ///Media alt text, if present.
110 pub ext_alt_text: Option<String>,
111}
112
113///Represents the types of media that can be attached to a tweet.
114#[derive(Debug, Copy, Clone, Deserialize, Serialize, PartialEq)]
115pub enum MediaType {
116 ///A static image.
117 #[serde(rename = "photo")]
118 Photo,
119 ///A video.
120 #[serde(rename = "video")]
121 Video,
122 ///An animated GIF, delivered as a video without audio.
123 #[serde(rename = "animated_gif")]
124 Gif,
125}
126
127///Represents the available sizes for a media file.
128#[derive(Debug, Copy, Clone, Deserialize, Serialize)]
129pub struct MediaSizes {
130 ///Information for a thumbnail-sized version of the media.
131 pub thumb: MediaSize,
132 ///Information for a small-sized version of the media.
133 pub small: MediaSize,
134 ///Information for a medium-sized version of the media.
135 pub medium: MediaSize,
136 ///Information for a large-sized version of the media.
137 pub large: MediaSize,
138}
139
140///Represents how an image has been resized for a given size variant.
141#[derive(Debug, Copy, Clone, Deserialize, Serialize, PartialEq)]
142pub enum ResizeMode {
143 ///The media was resized to fit one dimension, keeping its aspect ratio.
144 #[serde(rename = "fit")]
145 Fit,
146 ///The media was cropped to fit a specific resolution.
147 #[serde(rename = "crop")]
148 Crop,
149}
150
151///Represents the dimensions of a media file.
152#[derive(Debug, Copy, Clone, Deserialize, Serialize)]
153pub struct MediaSize {
154 ///The size variant's width in pixels.
155 pub w: i32,
156 ///The size variant's height in pixels.
157 pub h: i32,
158 ///The method used to obtain the given dimensions.
159 pub resize: ResizeMode,
160}
161
162///Represents metadata specific to videos.
163#[derive(Debug, Clone, Deserialize, Serialize)]
164pub struct VideoInfo {
165 ///The aspect ratio of the video.
166 pub aspect_ratio: (i32, i32),
167 ///The duration of the video, in milliseconds.
168 ///
169 ///This field is not given for animated GIFs.
170 pub duration_millis: Option<i32>,
171 ///Information about various encodings available for the video.
172 pub variants: Vec<VideoVariant>,
173}
174
175///Represents information about a specific encoding of a video.
176#[derive(Debug, Clone, Deserialize, Serialize)]
177pub struct VideoVariant {
178 ///The bitrate of the video. This value is present for GIFs, but it will be zero.
179 pub bitrate: Option<i32>,
180 ///The file format of the video variant.
181 #[serde(with = "serde_via_string")]
182 pub content_type: mime::Mime,
183 ///The URL for the video variant.
184 pub url: String,
185}
186
187///Represents a link extracted from another piece of text.
188#[derive(Debug, Clone, Deserialize, Serialize)]
189pub struct UrlEntity {
190 ///A truncated URL meant to be displayed inline with the text.
191 #[serde(default)]
192 pub display_url: String,
193 ///The URL that the t.co URL resolves to.
194 ///
195 ///Meant to be used as hover-text when a user mouses over a link.
196 #[serde(default)]
197 pub expanded_url: Option<String>,
198 ///The byte offsets in the companion text where the URL was extracted from.
199 #[serde(rename = "indices")]
200 pub range: (usize, usize),
201 ///The t.co URL extracted from the companion text.
202 pub url: String,
203}
204
205///Represnts a user mention extracted from another piece of text.
206#[derive(Debug, Clone, Deserialize, Serialize)]
207pub struct MentionEntity {
208 ///Numeric ID of the mentioned user.
209 #[serde(deserialize_with = "nullable_id")] // Very rarely this field is null
210 pub id: u64,
211 ///The byte offsets where the user mention is located in the original text. The first index is
212 ///the location of the @ symbol; the second is the location of the first character following
213 ///the user screen name.
214 #[serde(rename = "indices")]
215 pub range: (usize, usize),
216 ///Display name of the mentioned user.
217 #[serde(deserialize_with = "nullable_str")] // Very rarely, this field is null
218 pub name: String,
219 ///Screen name of the mentioned user, without the leading @ symbol.
220 pub screen_name: String,
221}
222
223fn nullable_id<'de, D>(deserializer: D) -> Result<u64, D::Error>
224where
225 D: Deserializer<'de>,
226{
227 let opt = Option::deserialize(deserializer)?;
228 Ok(opt.unwrap_or_default())
229}
230
231fn nullable_str<'de, D>(deserializer: D) -> Result<String, D::Error>
232where
233 D: Deserializer<'de>,
234{
235 let opt = Option::deserialize(deserializer)?;
236 Ok(opt.unwrap_or_default())
237}