1extern crate pest;
6use entity::Entity;
7use entity;
8use extractor::Extract;
9use extractor::Extractor;
10
11type Attributes = Vec<(String, String)>;
12const HREF: &'static str = "href";
13const CLASS: &'static str = "class";
14const TARGET: &'static str = "target";
15const TITLE: &'static str = "title";
16
17pub const DEFAULT_LIST_CLASS: &str = "tweet-url list-slug";
21
22pub const DEFAULT_USERNAME_CLASS: &str = "tweet-url username";
26
27pub const DEFAULT_HASHTAG_CLASS: &str = "tweet-url hashtag";
31
32pub const DEFAULT_CASHTAG_CLASS: &str = "tweet-url cashtag";
36
37pub const DEFAULT_USERNAME_URL_BASE: &str = "https://twitter.com/";
41
42pub const DEFAULT_LIST_URL_BASE: &str = "https://twitter.com/";
46
47pub const DEFAULT_HASHTAG_URL_BASE: &str = "https://twitter.com/search?q=%23";
51
52pub const DEFAULT_CASHTAG_URL_BASE: &str = "https://twitter.com/search?q=%24";
56
57pub const DEFAULT_INVISIBLE_TAG_ATTRS: &str = "style='position:absolute;left:-9999px;'";
61
62pub struct Autolinker<'a> {
66 pub no_follow: bool,
67 pub url_class: &'a str,
68 pub url_target: &'a str,
69 pub symbol_tag: &'a str,
70 pub text_with_symbol_tag: &'a str,
71 pub list_class: &'a str,
72 pub username_class: &'a str,
73 pub hashtag_class: &'a str,
74 pub cashtag_class: &'a str,
75 pub username_url_base: &'a str,
76 pub list_url_base: &'a str,
77 pub hashtag_url_base: &'a str,
78 pub cashtag_url_base: &'a str,
79 pub invisible_tag_attrs: &'a str,
80 pub username_include_symbol: bool,
81 extractor: Extractor,
82}
83
84impl<'a> Autolinker<'a> {
85 pub fn new(no_follow: bool) -> Autolinker<'a> {
87 let mut extractor = Extractor::new();
88 extractor.set_extract_url_without_protocol(false);
89 Autolinker {
90 no_follow,
91 url_class: "",
92 url_target: "",
93 symbol_tag: "",
94 text_with_symbol_tag: "",
95 list_class: DEFAULT_LIST_CLASS,
96 username_class: DEFAULT_USERNAME_CLASS,
97 hashtag_class: DEFAULT_HASHTAG_CLASS,
98 cashtag_class: DEFAULT_CASHTAG_CLASS,
99 username_url_base: DEFAULT_USERNAME_URL_BASE,
100 list_url_base: DEFAULT_LIST_URL_BASE,
101 hashtag_url_base: DEFAULT_HASHTAG_URL_BASE,
102 cashtag_url_base: DEFAULT_CASHTAG_URL_BASE,
103 invisible_tag_attrs: DEFAULT_INVISIBLE_TAG_ATTRS,
104 username_include_symbol: false,
105 extractor,
106 }
107 }
108
109 fn link_to_text(&self, entity: &Entity, original_text: &str,
110 attributes: &mut Attributes, buf: &mut String) {
111 if self.no_follow {
112 attributes.push((String::from("rel"), String::from("nofollow")));
113 }
114
115 let text = original_text;
116 buf.push_str("<a");
126 for (k, v) in attributes {
127 buf.push(' ');
128 buf.push_str(escape_html(k).as_str());
129 buf.push_str("=\"");
130 buf.push_str(escape_html(v).as_str());
131 buf.push('"');
132 }
133 buf.push('>');
134 buf.push_str(text);
135 buf.push_str("</a>");
136 }
137
138 fn link_to_text_with_symbol(&self, entity: &Entity, sym: &str, original_text: &str,
139 attributes: &mut Attributes, buf: &mut String) {
140 let tagged_symbol = match self.symbol_tag {
141 "" => String::from(sym),
142 _ => format!("<{}>{}</{}>", self.symbol_tag, sym, self.symbol_tag)
143 };
144 let text = escape_html(original_text);
145 let tagged_text = match self.text_with_symbol_tag {
146 "" => text,
147 _ => format!("<{}>{}</{}>", self.text_with_symbol_tag, text, self.text_with_symbol_tag)
148 };
149 let inc_sym = self.username_include_symbol || !(sym.contains('@') || sym.contains('\u{FF20}'));
150
151 if inc_sym {
152 self.link_to_text(entity, &(tagged_symbol + &tagged_text), attributes, buf);
153 } else {
154 buf.push_str(tagged_symbol.as_str());
155 self.link_to_text(entity, tagged_text.as_str(), attributes, buf);
156 }
157 }
158
159 fn link_to_hashtag(&self, entity: &Entity, text: &str, buf: &mut String) {
160 let hash_char = text.chars().skip(entity.get_start() as usize).take(1).collect::<String>();
161 let hashtag = entity.get_value();
162 let mut attrs: Attributes = Vec::new();
163 attrs.push((HREF.to_string(), String::from(self.hashtag_url_base.to_owned() + hashtag)));
164 attrs.push((TITLE.to_string(), String::from("#".to_owned() + hashtag)));
165
166 if contains_rtl(text) {
167 attrs.push((CLASS.to_string(), String::from(self.hashtag_class.to_owned() + " rtl")));
168 } else {
169 attrs.push((CLASS.to_string(), String::from(self.hashtag_class)));
170 }
171 self.link_to_text_with_symbol(entity, hash_char.as_str(), hashtag, &mut attrs, buf);
172 }
173
174 fn link_to_cashtag(&self, entity: &Entity, text: &str, buf: &mut String) {
175 let cashtag = entity.get_value();
176 let mut attrs: Attributes = Vec::new();
177 attrs.push((HREF.to_string(), self.cashtag_url_base.to_owned() + cashtag));
178 attrs.push((TITLE.to_string(), "$".to_owned() + cashtag));
179 attrs.push((CLASS.to_string(), String::from(self.cashtag_class)));
180
181 self.link_to_text_with_symbol(entity, "$", cashtag, &mut attrs, buf);
182 }
183
184 fn link_to_mention_and_list(&self, entity: &Entity, text: &str, buf: &mut String) {
185 let mut mention = String::from(entity.get_value());
186 let at_char = text.chars().skip(entity.get_start() as usize).take(1).collect::<String>();
187 let mut attrs: Attributes = Vec::new();
188
189 if entity.get_type() == entity::Type::MENTION && !entity.get_list_slug().is_empty() {
190 mention.push_str(entity.get_list_slug());
191 attrs.push((CLASS.to_string(), self.list_class.to_owned()));
192 attrs.push((HREF.to_string(), self.list_url_base.to_owned() + &mention));
193 } else {
194 attrs.push((CLASS.to_string(), self.username_class.to_owned()));
195 attrs.push((HREF.to_string(), self.username_url_base.to_owned() + &mention));
196 }
197
198 self.link_to_text_with_symbol(entity, at_char.as_str(), mention.as_str(), &mut attrs, buf);
199 }
200
201 fn link_to_url(&self, entity: &Entity, text: &str, buf: &mut String) {
202 let url = entity.get_value();
203 let mut link_text = escape_html(url);
204 if !entity.get_display_url().is_empty() && !entity.get_expanded_url().is_empty() {
205 let display_url_sans_ellipses = entity.get_display_url().replace("…", "");
247 let index = entity.get_expanded_url().find(&display_url_sans_ellipses);
248 if let Some(display_url_index_in_expanded_url) = index {
249 let before_display_url = entity.get_expanded_url().chars()
250 .take(display_url_index_in_expanded_url).collect::<String>();
251 let after_display_url = entity.get_expanded_url().chars().skip(
252 display_url_index_in_expanded_url + display_url_sans_ellipses.len()).collect::<String>();
253 let preceding_ellipsis = if entity.get_display_url().starts_with("…") {
254 "…"
255 } else {
256 ""
257 };
258 let following_ellipsis = if entity.get_display_url().ends_with("…") {
259 "…"
260 } else {
261 ""
262 };
263 let invisible_span = "<span ".to_owned() + self.invisible_tag_attrs + ">";
264
265 let mut sb = String::from("<span class='tco-ellipsis'>");
266 sb += preceding_ellipsis;
267 sb += &invisible_span;
268 sb += " </span></span>";
269 sb += &invisible_span;
270 sb += &escape_html(&before_display_url);
271 sb += "</span>";
272 sb += "<span class='js-display-url'>";
273 sb += &escape_html(&display_url_sans_ellipses);
274 sb += "</span>";
275 sb += &invisible_span;
276 sb += &escape_html(&after_display_url);
277 sb += "</span>";
278 sb += "<span class='tco-ellipsis'>";
279 sb += &invisible_span;
280 sb += " </span>";
281 sb += following_ellipsis;
282 sb += "</span>";
283
284 link_text = sb;
285 } else {
286 link_text = String::from(entity.get_display_url());
287 }
288 }
289
290 let mut attrs: Attributes = Vec::new();
291 attrs.push((HREF.to_string(), String::from(url)));
292 if !self.url_class.is_empty() {
293 attrs.push((CLASS.to_string(), String::from(self.url_class)));
294 }
295 if !self.url_target.is_empty() {
296 attrs.push((TARGET.to_string(), String::from(self.url_target)));
297 }
298 self.link_to_text(entity, &link_text, &mut attrs, buf);
299 }
300
301 pub fn autolink_entities(&self, text: &str, entities: &Vec<Entity>) -> String {
302 let mut buf = String::with_capacity(text.len() * 2);
303 let mut offset = 0usize;
304 for entity in entities {
305 buf += &text.chars().skip(offset).take(entity.get_start() as usize - offset).collect::<String>();
306 match entity.get_type() {
307 entity::Type::URL => self.link_to_url(entity, text, &mut buf),
308 entity::Type::HASHTAG => self.link_to_hashtag(entity, text, &mut buf),
309 entity::Type::MENTION => self.link_to_mention_and_list(entity, text, &mut buf),
310 entity::Type::CASHTAG => self.link_to_cashtag(entity, text, &mut buf),
311 }
312 offset = entity.get_end() as usize;
313 }
314 buf += &text.chars().skip(offset).collect::<String>();
315 buf
316 }
317
318 pub fn autolink(&self, original: &str) -> String {
320 let text = escape_brackets(original);
321 let entities = self.extractor.extract_entities_with_indices(&text);
322 self.autolink_entities(&text, &entities)
323 }
324
325 pub fn autolink_usernames_and_lists(&self, text: &str) -> String {
330 let entities = self.extractor.extract_mentions_or_lists_with_indices(text);
331 self.autolink_entities(text, &entities)
332 }
333
334 pub fn autolink_hashtags(&self, text: &str) -> String {
338 let entities = self.extractor.extract_hashtags(text);
339 self.autolink_entities(text, &entities)
340 }
341
342 pub fn autolink_urls(&self, text: &str) -> String {
346 let entities = self.extractor.extract_urls_with_indices(text);
347 self.autolink_entities(text, &entities)
348 }
349
350 pub fn autolink_cashtags(&self, text: &str) -> String {
354 let entities = self.extractor.extract_cashtags(text);
355 self.autolink_entities(text, &entities)
356 }
357}
358
359fn contains_rtl(s: &str) -> bool {
360 for c in s.chars() {
361 if (c >= '\u{0600}' && c <= '\u{06FF}') ||
362 (c >= '\u{0750}' && c <= '\u{077F}') ||
363 (c >= '\u{0590}' && c <= '\u{05FF}') ||
364 (c >= '\u{FE70}' && c <= '\u{FEFF}') {
365 return true;
366 }
367 }
368
369 return false;
370}
371
372fn escape_html(s: &str) -> String {
376 let mut last = 0;
377 let mut buf = String::with_capacity(s.len() * 2);
378 for (i, ch) in s.bytes().enumerate() {
379 match ch as char {
380 '<' | '>' | '&' | '\'' | '"' => {
381 buf.push_str(&s[last..i]);
382 let s = match ch as char {
383 '>' => ">",
384 '<' => "<",
385 '&' => "&",
386 '\'' => "'",
387 '"' => """,
388 _ => unreachable!()
389 };
390 buf.push_str(s);
391 last = i + 1;
392 }
393 _ => {}
394 }
395 }
396
397 if last < s.len() {
398 buf.push_str(&s[last..]);
399 }
400
401 buf
402}
403
404fn escape_brackets(s: &str) -> String {
405 let mut last = 0;
406 let mut buf = String::with_capacity(s.len() + 32);
407 for (i, ch) in s.bytes().enumerate() {
408 match ch as char {
409 '<' | '>' => {
410 buf.push_str(&s[last..i]);
411 let s = match ch as char {
412 '>' => ">",
413 '<' => "<",
414 _ => unreachable!()
415 };
416 buf.push_str(s);
417 last = i + 1;
418 }
419 _ => {}
420 }
421 }
422
423 if last < s.len() {
424 buf.push_str(&s[last..]);
425 }
426
427 buf
428}
429
430#[cfg(test)]
431mod tests {
432 use super::*;
433
434 #[test]
435 fn test_escape_html() {
436 let s = "foo <bar> baz & 'hmm' or \"hmm\"";
437 assert_eq!("foo <bar> baz & 'hmm' or "hmm"", escape_html(s));
438 }
439
440 #[test]
441 fn test_escape_brackets() {
442 let s = "foo <bar> baz & 'hmm' or \"hmm\"";
443 assert_eq!("foo <bar> baz & 'hmm' or \"hmm\"", escape_brackets(s));
444 }
445}