extern crate pest;
use entity::Entity;
use entity;
use extractor::Extract;
use extractor::Extractor;
type Attributes = Vec<(String, String)>;
const HREF: &'static str = "href";
const CLASS: &'static str = "class";
const TARGET: &'static str = "target";
const TITLE: &'static str = "title";
pub const DEFAULT_LIST_CLASS: &str = "tweet-url list-slug";
pub const DEFAULT_USERNAME_CLASS: &str = "tweet-url username";
pub const DEFAULT_HASHTAG_CLASS: &str = "tweet-url hashtag";
pub const DEFAULT_CASHTAG_CLASS: &str = "tweet-url cashtag";
pub const DEFAULT_USERNAME_URL_BASE: &str = "https://twitter.com/";
pub const DEFAULT_LIST_URL_BASE: &str = "https://twitter.com/";
pub const DEFAULT_HASHTAG_URL_BASE: &str = "https://twitter.com/search?q=%23";
pub const DEFAULT_CASHTAG_URL_BASE: &str = "https://twitter.com/search?q=%24";
pub const DEFAULT_INVISIBLE_TAG_ATTRS: &str = "style='position:absolute;left:-9999px;'";
pub struct Autolinker<'a> {
pub no_follow: bool,
pub url_class: &'a str,
pub url_target: &'a str,
pub symbol_tag: &'a str,
pub text_with_symbol_tag: &'a str,
pub list_class: &'a str,
pub username_class: &'a str,
pub hashtag_class: &'a str,
pub cashtag_class: &'a str,
pub username_url_base: &'a str,
pub list_url_base: &'a str,
pub hashtag_url_base: &'a str,
pub cashtag_url_base: &'a str,
pub invisible_tag_attrs: &'a str,
pub username_include_symbol: bool,
extractor: Extractor,
}
impl<'a> Autolinker<'a> {
pub fn new(no_follow: bool) -> Autolinker<'a> {
let mut extractor = Extractor::new();
extractor.set_extract_url_without_protocol(false);
Autolinker {
no_follow,
url_class: "",
url_target: "",
symbol_tag: "",
text_with_symbol_tag: "",
list_class: DEFAULT_LIST_CLASS,
username_class: DEFAULT_USERNAME_CLASS,
hashtag_class: DEFAULT_HASHTAG_CLASS,
cashtag_class: DEFAULT_CASHTAG_CLASS,
username_url_base: DEFAULT_USERNAME_URL_BASE,
list_url_base: DEFAULT_LIST_URL_BASE,
hashtag_url_base: DEFAULT_HASHTAG_URL_BASE,
cashtag_url_base: DEFAULT_CASHTAG_URL_BASE,
invisible_tag_attrs: DEFAULT_INVISIBLE_TAG_ATTRS,
username_include_symbol: false,
extractor,
}
}
fn link_to_text(&self, entity: &Entity, original_text: &str,
attributes: &mut Attributes, buf: &mut String) {
if self.no_follow {
attributes.push((String::from("rel"), String::from("nofollow")));
}
let text = original_text;
buf.push_str("<a");
for (k, v) in attributes {
buf.push(' ');
buf.push_str(escape_html(k).as_str());
buf.push_str("=\"");
buf.push_str(escape_html(v).as_str());
buf.push('"');
}
buf.push('>');
buf.push_str(text);
buf.push_str("</a>");
}
fn link_to_text_with_symbol(&self, entity: &Entity, sym: &str, original_text: &str,
attributes: &mut Attributes, buf: &mut String) {
let tagged_symbol = match self.symbol_tag {
"" => String::from(sym),
_ => format!("<{}>{}</{}>", self.symbol_tag, sym, self.symbol_tag)
};
let text = escape_html(original_text);
let tagged_text = match self.text_with_symbol_tag {
"" => text,
_ => format!("<{}>{}</{}>", self.text_with_symbol_tag, text, self.text_with_symbol_tag)
};
let inc_sym = self.username_include_symbol || !(sym.contains('@') || sym.contains('\u{FF20}'));
if inc_sym {
self.link_to_text(entity, &(tagged_symbol + &tagged_text), attributes, buf);
} else {
buf.push_str(tagged_symbol.as_str());
self.link_to_text(entity, tagged_text.as_str(), attributes, buf);
}
}
fn link_to_hashtag(&self, entity: &Entity, text: &str, buf: &mut String) {
let hash_char = text.chars().skip(entity.get_start() as usize).take(1).collect::<String>();
let hashtag = entity.get_value();
let mut attrs: Attributes = Vec::new();
attrs.push((HREF.to_string(), String::from(self.hashtag_url_base.to_owned() + hashtag)));
attrs.push((TITLE.to_string(), String::from("#".to_owned() + hashtag)));
if contains_rtl(text) {
attrs.push((CLASS.to_string(), String::from(self.hashtag_class.to_owned() + " rtl")));
} else {
attrs.push((CLASS.to_string(), String::from(self.hashtag_class)));
}
self.link_to_text_with_symbol(entity, hash_char.as_str(), hashtag, &mut attrs, buf);
}
fn link_to_cashtag(&self, entity: &Entity, text: &str, buf: &mut String) {
let cashtag = entity.get_value();
let mut attrs: Attributes = Vec::new();
attrs.push((HREF.to_string(), self.cashtag_url_base.to_owned() + cashtag));
attrs.push((TITLE.to_string(), "$".to_owned() + cashtag));
attrs.push((CLASS.to_string(), String::from(self.cashtag_class)));
self.link_to_text_with_symbol(entity, "$", cashtag, &mut attrs, buf);
}
fn link_to_mention_and_list(&self, entity: &Entity, text: &str, buf: &mut String) {
let mut mention = String::from(entity.get_value());
let at_char = text.chars().skip(entity.get_start() as usize).take(1).collect::<String>();
let mut attrs: Attributes = Vec::new();
if entity.get_type() == entity::Type::MENTION && !entity.get_list_slug().is_empty() {
mention.push_str(entity.get_list_slug());
attrs.push((CLASS.to_string(), self.list_class.to_owned()));
attrs.push((HREF.to_string(), self.list_url_base.to_owned() + &mention));
} else {
attrs.push((CLASS.to_string(), self.username_class.to_owned()));
attrs.push((HREF.to_string(), self.username_url_base.to_owned() + &mention));
}
self.link_to_text_with_symbol(entity, at_char.as_str(), mention.as_str(), &mut attrs, buf);
}
fn link_to_url(&self, entity: &Entity, text: &str, buf: &mut String) {
let url = entity.get_value();
let mut link_text = escape_html(url);
if !entity.get_display_url().is_empty() && !entity.get_expanded_url().is_empty() {
let display_url_sans_ellipses = entity.get_display_url().replace("…", "");
let index = entity.get_expanded_url().find(&display_url_sans_ellipses);
if let Some(display_url_index_in_expanded_url) = index {
let before_display_url = entity.get_expanded_url().chars()
.take(display_url_index_in_expanded_url).collect::<String>();
let after_display_url = entity.get_expanded_url().chars().skip(
display_url_index_in_expanded_url + display_url_sans_ellipses.len()).collect::<String>();
let preceding_ellipsis = if entity.get_display_url().starts_with("…") {
"…"
} else {
""
};
let following_ellipsis = if entity.get_display_url().ends_with("…") {
"…"
} else {
""
};
let invisible_span = "<span ".to_owned() + self.invisible_tag_attrs + ">";
let mut sb = String::from("<span class='tco-ellipsis'>");
sb += preceding_ellipsis;
sb += &invisible_span;
sb += " </span></span>";
sb += &invisible_span;
sb += &escape_html(&before_display_url);
sb += "</span>";
sb += "<span class='js-display-url'>";
sb += &escape_html(&display_url_sans_ellipses);
sb += "</span>";
sb += &invisible_span;
sb += &escape_html(&after_display_url);
sb += "</span>";
sb += "<span class='tco-ellipsis'>";
sb += &invisible_span;
sb += " </span>";
sb += following_ellipsis;
sb += "</span>";
link_text = sb;
} else {
link_text = String::from(entity.get_display_url());
}
}
let mut attrs: Attributes = Vec::new();
attrs.push((HREF.to_string(), String::from(url)));
if !self.url_class.is_empty() {
attrs.push((CLASS.to_string(), String::from(self.url_class)));
}
if !self.url_target.is_empty() {
attrs.push((TARGET.to_string(), String::from(self.url_target)));
}
self.link_to_text(entity, &link_text, &mut attrs, buf);
}
pub fn autolink_entities(&self, text: &str, entities: &Vec<Entity>) -> String {
let mut buf = String::with_capacity(text.len() * 2);
let mut offset = 0usize;
for entity in entities {
buf += &text.chars().skip(offset).take(entity.get_start() as usize - offset).collect::<String>();
match entity.get_type() {
entity::Type::URL => self.link_to_url(entity, text, &mut buf),
entity::Type::HASHTAG => self.link_to_hashtag(entity, text, &mut buf),
entity::Type::MENTION => self.link_to_mention_and_list(entity, text, &mut buf),
entity::Type::CASHTAG => self.link_to_cashtag(entity, text, &mut buf),
}
offset = entity.get_end() as usize;
}
buf += &text.chars().skip(offset).collect::<String>();
buf
}
pub fn autolink(&self, original: &str) -> String {
let text = escape_brackets(original);
let entities = self.extractor.extract_entities_with_indices(&text);
self.autolink_entities(&text, &entities)
}
pub fn autolink_usernames_and_lists(&self, text: &str) -> String {
let entities = self.extractor.extract_mentions_or_lists_with_indices(text);
self.autolink_entities(text, &entities)
}
pub fn autolink_hashtags(&self, text: &str) -> String {
let entities = self.extractor.extract_hashtags(text);
self.autolink_entities(text, &entities)
}
pub fn autolink_urls(&self, text: &str) -> String {
let entities = self.extractor.extract_urls_with_indices(text);
self.autolink_entities(text, &entities)
}
pub fn autolink_cashtags(&self, text: &str) -> String {
let entities = self.extractor.extract_cashtags(text);
self.autolink_entities(text, &entities)
}
}
fn contains_rtl(s: &str) -> bool {
for c in s.chars() {
if (c >= '\u{0600}' && c <= '\u{06FF}') ||
(c >= '\u{0750}' && c <= '\u{077F}') ||
(c >= '\u{0590}' && c <= '\u{05FF}') ||
(c >= '\u{FE70}' && c <= '\u{FEFF}') {
return true;
}
}
return false;
}
fn escape_html(s: &str) -> String {
let mut last = 0;
let mut buf = String::with_capacity(s.len() * 2);
for (i, ch) in s.bytes().enumerate() {
match ch as char {
'<' | '>' | '&' | '\'' | '"' => {
buf.push_str(&s[last..i]);
let s = match ch as char {
'>' => ">",
'<' => "<",
'&' => "&",
'\'' => "'",
'"' => """,
_ => unreachable!()
};
buf.push_str(s);
last = i + 1;
}
_ => {}
}
}
if last < s.len() {
buf.push_str(&s[last..]);
}
buf
}
fn escape_brackets(s: &str) -> String {
let mut last = 0;
let mut buf = String::with_capacity(s.len() + 32);
for (i, ch) in s.bytes().enumerate() {
match ch as char {
'<' | '>' => {
buf.push_str(&s[last..i]);
let s = match ch as char {
'>' => ">",
'<' => "<",
_ => unreachable!()
};
buf.push_str(s);
last = i + 1;
}
_ => {}
}
}
if last < s.len() {
buf.push_str(&s[last..]);
}
buf
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_escape_html() {
let s = "foo <bar> baz & 'hmm' or \"hmm\"";
assert_eq!("foo <bar> baz & 'hmm' or "hmm"", escape_html(s));
}
#[test]
fn test_escape_brackets() {
let s = "foo <bar> baz & 'hmm' or \"hmm\"";
assert_eq!("foo <bar> baz & 'hmm' or \"hmm\"", escape_brackets(s));
}
}