url_bot_rs/
message.rs

1use irc::client::prelude::*;
2use std::{
3    iter,
4    collections::HashSet,
5};
6use unicode_segmentation::UnicodeSegmentation;
7use reqwest::Url;
8use regex::Regex;
9use log::{info, error, trace};
10use lazy_static::lazy_static;
11
12use crate::{
13    feat, param,
14    http::resolve_url,
15    sqlite::{Database, NewLogEntry},
16    config::Rtd,
17    tld::TLD,
18    plugins::TITLE_PLUGINS,
19};
20
21pub fn handle_message(client: &IrcClient, message: &Message, rtd: &mut Rtd, db: &Database) {
22    trace!("{:?}", message.command);
23
24    let sender = message.source_nickname();
25    let target = message.response_target();
26
27    match &message.command {
28        Command::KICK(chan, nick, _) => kick(client, rtd, chan, nick),
29        Command::INVITE(nick, chan) => invite(client, rtd, nick, chan),
30        Command::PRIVMSG(tgt, msg) => {
31            let sender = sender.unwrap();
32            let target = target.unwrap_or(tgt);
33            let message = Msg::new(rtd, sender, target, msg);
34            privmsg(client, rtd, db, &message)
35        },
36        _ => {},
37    };
38}
39
40fn kick(client: &IrcClient, rtd: &mut Rtd, chan: &str, nick: &str) {
41    if !feat!(rtd, autosave) {
42        return;
43    }
44
45    if nick != client.current_nickname() {
46        return;
47    }
48
49    info!("kicked from {}", chan);
50
51    rtd.conf.remove_channel(chan);
52}
53
54fn invite(client: &IrcClient, rtd: &mut Rtd, nick: &str, chan: &str) {
55    if !feat!(rtd, invite) {
56        return;
57    }
58
59    if nick != client.current_nickname() {
60        return;
61    }
62
63    info!("invited to channel: {}", chan);
64
65    if let Err(e) = client.send_join(chan) {
66        error!("error joining channel: {}", e);
67    } else {
68        info!("joined {}", chan);
69
70        if feat!(rtd, autosave) {
71            rtd.conf.add_channel(chan.to_string());
72        };
73    };
74}
75
76#[derive(Debug, PartialEq)]
77enum TitleResp {
78    Title(String),
79    Error(String),
80}
81
82#[derive(Debug)]
83struct Msg<'a> {
84    is_chanmsg: bool,
85    is_ping: bool,
86    target: &'a str,
87    sender: &'a str,
88    text: &'a str,
89}
90
91impl<'a> Msg<'a> {
92    fn new(rtd: &Rtd, sender: &'a str, target: &'a str, text: &'a str) -> Msg<'a> {
93        let our_nick = rtd.conf.client.nickname.as_ref().unwrap();
94
95        Msg {
96            is_chanmsg: target.starts_with('#'),
97            is_ping: is_ping(our_nick, text),
98            sender,
99            target,
100            text,
101        }
102    }
103}
104
105fn privmsg(client: &IrcClient, rtd: &Rtd, db: &Database, msg: &Msg) {
106    // ignore messages sent to status channels
107    if param!(rtd, status_channels).contains(&msg.target.to_string()) {
108        if msg.is_ping || contains_urls(msg.text) {
109            let m = format!("ignoring messages in channel {}", msg.target);
110            client.send_privmsg(&msg.sender, m).unwrap();
111        }
112        return;
113    }
114
115    let titles: Vec<_> = process_titles(rtd, db, msg).collect();
116
117    for resp in &titles {
118        match resp {
119            TitleResp::Title(t) => respond(client, rtd, msg, t),
120            TitleResp::Error(e) => respond_error(client, rtd, msg, e),
121        }
122    }
123
124    // if we had no url message and got a ping send nick response
125    if titles.is_empty() && msg.is_ping {
126        respond(client, rtd, msg, &param!(rtd, nick_response_str));
127    }
128
129}
130
131/// Run available plugins on a single URL, return the first successful title.
132fn process_plugins(rtd: &Rtd, url: &Url) -> Option<String> {
133    let result: String = TITLE_PLUGINS
134        .iter()
135        .filter(|p| p.check(&rtd.conf.plugins, url))
136        .filter_map(|p| p.evaluate(rtd, url).ok())
137        .take(1)
138        .collect();
139
140    if result.is_empty() {
141        None
142    } else {
143        Some(result)
144    }
145}
146
147/// find titles in a message and generate responses
148fn process_titles(rtd: &Rtd, db: &Database, msg: &Msg) -> impl Iterator<Item = TitleResp> {
149    // return an empty iterator for messages originating from a nick
150    // that is configured to be ignored
151    if param!(rtd, ignore_nicks).contains(&msg.sender.to_string()) {
152        return vec![].into_iter();
153    }
154
155    let mut responses: Vec<TitleResp> = vec![];
156
157    let mut num_processed = 0;
158    let mut dedup_urls = HashSet::new();
159
160    // look at each space-separated message token
161    for token in msg.text.split_whitespace() {
162        // the token must not contain unsafe characters
163        if contains_unsafe_chars(token) {
164            continue;
165        }
166
167        // get a full URL for tokens without a scheme
168        let maybe_token = if feat!(rtd, partial_urls) {
169            add_scheme_for_tld(token)
170        } else {
171            None
172        };
173
174        let token = maybe_token
175            .as_ref()
176            .map_or(token, String::as_str);
177
178        // the token must be a valid url
179        let url = match token.parse::<Url>() {
180            Ok(url) => url,
181            _ => continue,
182        };
183
184        // the scheme must be http or https
185        if !["http", "https"].contains(&url.scheme()) {
186            continue;
187        }
188
189        // skip duplicate urls within the message
190        if dedup_urls.contains(&url) {
191            continue;
192        }
193
194        info!("[{}] RESOLVE <{}>", rtd.conf.network.name, token);
195
196        // try to get the title from the url
197        let title = if let Some(title) = process_plugins(rtd, &url) {
198            title
199        } else {
200            match resolve_url(token, rtd) {
201                Ok(title) => title,
202                Err(err) => {
203                    error!("{:?}", err);
204                    responses.push(TitleResp::Error(err.to_string()));
205                    continue;
206                },
207            }
208        };
209
210        // create a log entry struct
211        let entry = NewLogEntry {
212            title: &title,
213            url: token,
214            user: msg.sender,
215            channel: msg.target,
216        };
217
218        // check for pre-post
219        let pre_post = if feat!(rtd, history) {
220            db.check_prepost(token)
221        } else {
222            Ok(None)
223        };
224
225        let pre_post_found = matches!(pre_post, Ok(Some(_)));
226
227        // limit pre-post to same channel if required by configuration
228        let pre_post = if feat!(rtd, cross_channel_history) {
229            pre_post
230        } else {
231            pre_post
232                .map(|p| p.and_then(|p| {
233                    if p.channel == msg.target {
234                        Some(p)
235                    } else {
236                        None
237                    }
238                }))
239        };
240
241        // generate response string
242        let mut msg = match pre_post {
243            Ok(Some(previous_post)) => {
244                let user = if feat!(rtd, mask_highlights) {
245                    create_non_highlighting_name(&previous_post.user)
246                } else {
247                    previous_post.user
248                };
249                format!("⤷ {} → {} {} ({})",
250                    title,
251                    previous_post.time_created,
252                    user,
253                    previous_post.channel
254                )
255            },
256            Ok(None) => {
257                // add new log entry to database, if posted in a channel
258                if feat!(rtd, history) && !pre_post_found && msg.is_chanmsg {
259                    if let Err(err) = db.add_log(&entry) {
260                        error!("SQL error: {}", err);
261                    }
262                }
263                format!("⤷ {}", title)
264            },
265            Err(err) => {
266                error!("SQL error: {}", err);
267                continue
268            },
269        };
270
271        // limit response length, see RFC1459
272        msg = utf8_truncate(&msg, 510);
273
274        info!("[{}] {}", rtd.conf.network.name, msg);
275
276        responses.push(TitleResp::Title(msg.to_string()));
277
278        dedup_urls.insert(url);
279
280        // limit the number of processed URLs
281        num_processed += 1;
282        if num_processed == param!(rtd, url_limit) {
283            break;
284        }
285    };
286
287    responses.into_iter()
288}
289
290/// send IRC response
291fn respond<S>(client: &IrcClient, rtd: &Rtd, msg: &Msg, text: S)
292where
293    S: ToString + std::fmt::Display,
294{
295    let result = if feat!(rtd, send_notice) && msg.is_chanmsg {
296        client.send_notice(&msg.target, &text)
297    } else {
298        client.send_privmsg(&msg.target, &text)
299    };
300
301    result.unwrap_or_else(|err| {
302        error!("Error sending response {}: {}", msg.target, err);
303    });
304}
305
306fn respond_error<S>(client: &IrcClient, rtd: &Rtd, msg: &Msg, text: S)
307where
308    S: ToString + std::fmt::Display,
309{
310    // reply with error, if message was sent in a channel
311    // always reply with errors in queries
312    if !msg.is_chanmsg || feat!(rtd, reply_with_errors) {
313        respond(client, rtd, msg, &text);
314    };
315
316    // send errors to poster by query
317    // do not send if link was already sent in a query, since this
318    // duplicates messages
319    if msg.is_chanmsg && feat!(rtd, send_errors_to_poster) {
320        client.send_privmsg(&msg.sender, &text).unwrap();
321    };
322
323    // send error messages to status channels, for channel messages only
324    // this may still leak link information from, e.g. secret channels
325    if msg.is_chanmsg {
326        msg_status_chans(client, rtd, &text);
327    }
328}
329
330fn contains_urls(text: &str) -> bool {
331    text
332        .split_whitespace()
333        .filter(|token| token.parse::<Url>().is_ok())
334        .count() > 0
335}
336
337// regex for unsafe characters, as defined in RFC 1738
338const RE_UNSAFE_CHARS: &str = r#"[{}|\\^~\[\]`<>"]"#;
339
340/// does the token contain characters not permitted by RFC 1738
341fn contains_unsafe_chars(token: &str) -> bool {
342    lazy_static! {
343        static ref UNSAFE: Regex = Regex::new(RE_UNSAFE_CHARS).unwrap();
344    }
345    UNSAFE.is_match(token)
346}
347
348/// does a message look like it contains a ping
349fn is_ping(nick: &str, message: &str) -> bool {
350    let regex = format!(r#"\b{}\b"#, nick);
351    let ping = Regex::new(&regex).unwrap();
352    ping.is_match(message)
353}
354
355/// create a name that doesn't trigger highlight regexes
356fn create_non_highlighting_name(name: &str) -> String {
357    let mut graphemes = name.graphemes(true);
358    let first = graphemes.next();
359
360    first
361        .into_iter()
362        .chain(iter::once("\u{200C}"))
363        .chain(graphemes)
364        .collect()
365}
366
367/// truncate to a maximum number of bytes, taking UTF-8 into account
368fn utf8_truncate(s: &str, n: usize) -> String {
369    s.char_indices()
370        .take_while(|(len, c)| len + c.len_utf8() <= n)
371        .map(|(_, c)| c)
372        .collect()
373}
374
375lazy_static! {
376    static ref REPEATED_DOTS: Regex = Regex::new(r"\.\.+").unwrap();
377}
378
379/// if a token has a recognised TLD, but no scheme, add one
380pub fn add_scheme_for_tld(token: &str) -> Option<String> {
381    if token.parse::<Url>().is_err() {
382        if token.starts_with(|s: char| !s.is_alphabetic()) {
383            return None;
384        }
385
386        if REPEATED_DOTS.is_match(token) {
387            return None;
388        }
389
390        let new_token = format!("http://{}", token);
391
392        if let Ok(url) = new_token.parse::<Url>() {
393            if !url.domain()?.contains('.') {
394                return None;
395            }
396
397            // reject email addresses
398            if url.username() != "" {
399                return None;
400            }
401
402            let tld = url.domain()?
403                .split('.')
404                .last()?;
405
406            if TLD.contains(tld) {
407                return Some(new_token);
408            }
409        }
410    }
411
412    None
413}
414
415/// join any status channels not already joined and send a message to them
416pub fn msg_status_chans<S>(client: &IrcClient, rtd: &Rtd, msg: S)
417where
418    S: ToString + std::fmt::Display,
419{
420    if param!(rtd, status_channels).is_empty() {
421        return;
422    };
423
424    param!(rtd, status_channels)
425        .iter()
426        .for_each(|c| client.send_join(c).unwrap_or_else(|err| {
427            error!("Error joining status channel {}: {}", c, err)
428        }));
429
430    param!(rtd, status_channels)
431        .iter()
432        .for_each(|c| client.send_privmsg(c, &msg).unwrap());
433}
434
435#[cfg(test)]
436mod tests {
437    use super::*;
438    use std::thread;
439    use std::time::Duration;
440    use tiny_http::Response;
441    use super::TitleResp::{Title, Error};
442
443    fn serve_html() {
444        let _ = thread::spawn(move || {
445            let srv = tiny_http::Server::http("127.0.0.1:28382").unwrap();
446            loop {
447                let rq = srv.recv().unwrap();
448                let resp = match rq.url() {
449                    "/empty" => Response::from_string(""),
450                    "/blank" => Response::from_string("<title></title>"),
451                    _ => Response::from_string("<title>|t|</title>"),
452                };
453                rq.respond(resp).unwrap();
454            }
455        });
456        thread::sleep(Duration::from_millis(100));
457    }
458
459    fn pt(m: &str) -> Vec<TitleResp> {
460        let rtd = Rtd::new().init_http_client().unwrap();
461        pt_with_rtd(m, &rtd)
462    }
463
464    fn pt_with_rtd(m: &str, rtd: &Rtd) -> Vec<TitleResp> {
465        let msg = Msg::new(rtd, "testnick", "#testchannel", m);
466        let db = Database::open_in_memory().unwrap();
467        let ret = process_titles(&rtd, &db, &msg).collect();
468        println!("message: \"{}\"", m);
469        println!("{:?}", ret);
470        ret
471    }
472
473    fn pt_n(n: usize) -> Vec<TitleResp> {
474        let mut c = 0;
475        let m = iter::repeat("http://127.0.0.1:28382/")
476            .take(n)
477            .map(|t| {c += 1; format!("{}{}", t, c)})
478            .collect::<Vec<String>>()
479            .join(" ");
480        pt(&m)
481    }
482
483    #[test]
484    fn test_process_titles_count() {
485        serve_html();
486        assert_eq!(0, pt("").len());
487        assert_eq!(1, pt("http://127.0.0.1:28382/").len());
488        assert_eq!(2, pt("http://127.0.0.1:28382/1 http://127.0.0.1:28382/2").len());
489        assert_eq!(4, pt_n(4).len());
490        assert_eq!(8, pt_n(8).len());
491    }
492
493    #[test]
494    fn test_process_titles_deduplicate() {
495        assert_eq!(1, pt("http://127.0.0.1:28382 http://127.0.0.1:28382").len());
496        let m = iter::repeat("http://127.0.0.1:28382/")
497            .take(10)
498            .collect::<Vec<&str>>()
499            .join(" ");
500        assert_eq!(1, pt(&m).len());
501    }
502
503    #[test]
504    fn test_process_titles_limit() {
505        // default limit is 10
506        assert_eq!(10, pt_n(10).len());
507        assert_eq!(10, pt_n(11).len());
508        assert_eq!(10, pt_n(16).len());
509        assert_eq!(10, pt_n(32).len());
510    }
511
512    #[test]
513    fn test_process_titles_value() {
514        pt("http://127.0.0.1:28382/")
515            .iter()
516            .for_each(|v| assert_eq!(&Title("⤷ |t|".to_string()), v));
517    }
518
519    #[test]
520    fn test_process_titles_repost() {
521        let mut rtd = Rtd::new().init_http_client().unwrap();
522        feat!(rtd, history) = true;
523        feat!(rtd, cross_channel_history) = false;
524
525        let msg = Msg::new(&rtd, "testnick", "#test", "http://127.0.0.1:28382/");
526        let db = Database::open_in_memory().unwrap();
527
528        let d = r#"( [[:alpha:]]{3}){2} \d{1,2} \d{2}:\d{2}:\d{2} \d{4}"#;
529        let date = Regex::new(d).unwrap();
530
531        // no pre-post
532        let res: Vec<_> = process_titles(&rtd, &db, &msg).collect();
533        assert_eq!(1, res.len());
534        assert!(if let Title(_) = res[0] { true } else { false });
535
536        res.iter()
537            .for_each(|v| assert_eq!(Title("⤷ |t|".to_string()), *v));
538
539        // pre-post
540        let res: Vec<_> = process_titles(&rtd, &db, &msg).collect();
541        assert_eq!(1, res.len());
542        assert!(if let Title(_) = res[0] { true } else { false });
543
544        res.iter()
545            .for_each(|v| {
546                println!("{:?}", v);
547                if let Title(s) = v {
548                    assert!(s.starts_with("⤷ |t| → "));
549                    assert!(date.is_match(s));
550                    assert!(s.ends_with(" testnick (#test)"));
551                }
552            });
553
554        // pre-post with masked highlights enabled
555        feat!(rtd, mask_highlights) = true;
556
557        let res: Vec<_> = process_titles(&rtd, &db, &msg).collect();
558        assert_eq!(1, res.len());
559        assert!(if let Title(_) = res[0] { true } else { false });
560
561        res.iter()
562            .for_each(|v| {
563                println!("{:?}", v);
564                if let Title(s) = v {
565                    assert!(s.starts_with("⤷ |t| → "));
566                    assert!(date.is_match(s));
567                    assert!(s.ends_with(" t\u{200c}estnick (#test)"));
568                }
569            });
570
571        feat!(rtd, mask_highlights) = false;
572
573        let msg2 = Msg::new(&rtd, "testnick", "#test2", "http://127.0.0.1:28382/");
574
575        // cross-posted history is disabled
576        let res: Vec<_> = process_titles(&rtd, &db, &msg2).collect();
577        assert_eq!(1, res.len());
578        assert!(if let Title(_) = res[0] { true } else { false });
579
580        res.iter()
581            .for_each(|v| assert_eq!(Title("⤷ |t|".to_string()), *v));
582
583        // cross-posted history is enabled
584        feat!(rtd, cross_channel_history) = true;
585
586        let res: Vec<_> = process_titles(&rtd, &db, &msg2).collect();
587        assert_eq!(1, res.len());
588        assert!(if let Title(_) = res[0] { true } else { false });
589
590        res.iter()
591            .for_each(|v| {
592                println!("{:?}", v);
593                if let Title(s) = v {
594                    assert!(s.starts_with("⤷ |t| → "));
595                    assert!(date.is_match(s));
596                    assert!(s.ends_with(" testnick (#test)"));
597                }
598            });
599    }
600
601    #[test]
602    fn test_process_titles_http_https_only() {
603        assert_eq!(0, pt("git://127.0.0.1:28382/").len());
604        assert_eq!(0, pt("ssh://127.0.0.1:28382/").len());
605        assert_eq!(0, pt("ftp://127.0.0.1:28382/").len());
606    }
607
608    #[test]
609    fn test_process_titles_unsafe_chars() {
610        assert_eq!(0, pt("http://127.0.0.1:28382/{}").len());
611    }
612
613    fn err_val(r: &TitleResp, s: &str) -> bool {
614        if let Error(st) = r {
615            st == s
616        } else { false }
617    }
618
619    #[test]
620    fn test_process_titles_resolve_error() {
621        assert!(err_val(&pt("http://127.0.0.1:28382/empty")[0],
622            "http://127.0.0.1:28382/empty: failed to parse title"));
623        assert!(err_val(&pt("http://127.0.0.1:28382/blank")[0],
624            "http://127.0.0.1:28382/blank: failed to parse title"));
625    }
626
627    #[test]
628    #[ignore]
629    fn test_process_titles_partial() {
630        let mut rtd = Rtd::new().init_http_client().unwrap();
631        feat!(rtd, partial_urls) = true;
632
633        let db = Database::open_in_memory().unwrap();
634
635        let msg = Msg::new(&rtd, "testnick", "#test", "google.com");
636        println!("{:?}", msg);
637        assert_eq!(1, process_titles(&rtd, &db, &msg).count());
638
639        let msg = Msg::new(&rtd, "testnick", "#test", "docs.rs");
640        println!("{:?}", msg);
641        assert_eq!(1, process_titles(&rtd, &db, &msg).count());
642    }
643
644    #[test]
645    fn test_process_titles_ignored_nicks() {
646        let mut rtd = Rtd::new().init_http_client().unwrap();
647        // no ignores
648        param!(rtd, ignore_nicks) = vec![];
649        assert_eq!(1, pt_with_rtd("http://127.0.0.1:28382/", &rtd).len());
650        // ignore "testnick" nick
651        param!(rtd, ignore_nicks) = vec!["testnick".to_string()];
652        assert!(pt_with_rtd("http://127.0.0.1:28382/", &rtd).is_empty());
653        param!(rtd, ignore_nicks) = vec![
654            "foo".to_string(),
655            "testnick".to_string(),
656            "bar".to_string(),
657            "baz".to_string(),
658            "qux".to_string()
659        ];
660        assert!(pt_with_rtd("http://127.0.0.1:28382/", &rtd).is_empty());
661    }
662
663    #[test]
664    fn test_is_ping() {
665        assert_eq!(is_ping("a", "a"), true);
666        assert_eq!(is_ping("a", "a ^"), true);
667        assert_eq!(is_ping("a", "a:"), true);
668        assert_eq!(is_ping("a", "a: hi"), true);
669        assert_eq!(is_ping("a", "a hi"), true);
670        assert_eq!(is_ping("a", "a,"), true);
671        assert_eq!(is_ping("a", "a, hi"), true);
672        assert_eq!(is_ping("a", "b: a:"), true);
673        assert_eq!(is_ping("a", "b, a:"), true);
674        assert_eq!(is_ping("a", "b,a:"), true);
675        assert_eq!(is_ping("a", "b,a"), true);
676        assert_eq!(is_ping("a", "a,b:"), true);
677        assert_eq!(is_ping("a", "a,b"), true);
678        assert_eq!(is_ping("b", "also, b:"), true);
679        assert_eq!(is_ping("b", "also, b: hi"), true);
680        assert_eq!(is_ping("a", "words words words a"), true);
681        assert_eq!(is_ping("a", "hi, a"), true);
682        assert_eq!(is_ping("a", "hi a"), true);
683        assert_eq!(is_ping("a", "@a"), true);
684        assert_eq!(is_ping("a", "@a:"), true);
685        assert_eq!(is_ping("a", "@a: hi"), true);
686        assert_eq!(is_ping("a", "@a, hi"), true);
687        assert_eq!(is_ping("a", "@a hi"), true);
688        assert_eq!(is_ping("a", "...a"), true);
689        assert_eq!(is_ping("a", "a... hi"), true);
690        assert_eq!(is_ping("a", "b/a:"), true);
691        assert_eq!(is_ping("a", "a/b:"), true);
692        assert_eq!(is_ping("a", " a:"), true);
693    }
694
695    #[test]
696    fn test_is_ping_no_partial_nick() {
697        assert_eq!(is_ping("a", "abc"), false);
698        assert_eq!(is_ping("a", "bac"), false);
699        assert_eq!(is_ping("a", "bca"), false);
700        assert_eq!(is_ping("a", "abc bac bca"), false);
701        assert_eq!(is_ping("a", "lemonades are happy at car parks"), false);
702    }
703
704    #[test]
705    fn test_utf8_truncate() {
706        assert_eq!("", utf8_truncate("", 10));
707        assert_eq!("", utf8_truncate("", 1));
708        assert_eq!(" ", utf8_truncate("  ", 1));
709        assert_eq!("\u{2665}", utf8_truncate("\u{2665}", 4));
710        assert_eq!("\u{2665}", utf8_truncate("\u{2665}", 3));
711        assert_eq!("", utf8_truncate("\u{2665}", 2));
712        assert_eq!("\u{0306}\u{0306}", utf8_truncate("\u{0306}\u{0306}", 4));
713        assert_eq!("\u{0306}", utf8_truncate("\u{0306}\u{0306}", 2));
714        assert_eq!("\u{0306}", utf8_truncate("\u{0306}", 2));
715        assert_eq!("", utf8_truncate("\u{0306}", 1));
716        assert_eq!("hello ", utf8_truncate("hello \u{1F603} world!", 9));
717    }
718
719    #[test]
720    fn test_create_non_highlighting_name() {
721        assert_eq!("\u{200C}", create_non_highlighting_name(""));
722        assert_eq!("f\u{200C}oo", create_non_highlighting_name("foo"));
723        assert_eq!("b\u{200C}ar", create_non_highlighting_name("bar"));
724        assert_eq!("b\u{200C}az", create_non_highlighting_name("baz"));
725    }
726
727    #[test]
728    fn test_contains_unsafe_chars() {
729        for c in &['{', '}', '|', '\\', '^', '~', '[', ']', '`', '<', '>', '"']
730        {
731            assert!(contains_unsafe_chars(&format!("http://z/{}", c)));
732        }
733        assert_eq!(contains_unsafe_chars("http://z.zzz/"), false);
734    }
735
736    #[test]
737    fn test_add_scheme_for_tld() {
738        // appears to be a URL, and has a valid TLD
739        assert!(add_scheme_for_tld("docs.rs").is_some());
740        assert!(add_scheme_for_tld("nomnomnom.xyz").is_some());
741        assert!(add_scheme_for_tld("endless.horse").is_some());
742        assert!(add_scheme_for_tld("google.co.uk").is_some());
743        assert!(add_scheme_for_tld("notreal.co.uk/#banana").is_some());
744        assert!(add_scheme_for_tld("notreal.co.uk/?banana=3").is_some());
745
746        // return value is as expected
747        assert_eq!(
748            Some(String::from("http://nomnomnom.xyz")),
749            add_scheme_for_tld("nomnomnom.xyz")
750        );
751        assert_eq!(
752            Some(String::from("http://google.co.uk")),
753            add_scheme_for_tld("google.co.uk")
754        );
755
756        // already a valid URL
757        assert!(add_scheme_for_tld("http://nomnomnom.xyz").is_none());
758        assert!(add_scheme_for_tld("http://endless.horse").is_none());
759
760        // not a recognised TLD
761        assert!(add_scheme_for_tld("abc.cheese").is_none());
762        assert!(add_scheme_for_tld("abc.limes").is_none());
763
764        // recognised TLD, but incomplete as a URL
765        assert!(add_scheme_for_tld("xyz").is_none());
766        assert!(add_scheme_for_tld("uk").is_none());
767        assert!(add_scheme_for_tld("horse").is_none());
768
769        // don't resolve email addresses
770        assert!(add_scheme_for_tld("test@gmail.com").is_none());
771        assert!(add_scheme_for_tld("word.word@gmail.com").is_none());
772
773        // don't resolve tokens beinning with @
774        assert!(add_scheme_for_tld("@gmail.com").is_none());
775        assert!(add_scheme_for_tld("@endless.horse").is_none());
776
777        // don't resolve tokens beginning with '.'
778        assert!(add_scheme_for_tld(".net").is_none());
779        assert!(add_scheme_for_tld(".zip").is_none());
780        assert!(add_scheme_for_tld("...cool").is_none());
781
782        // don't resolve tokens containing repeated full-stops
783        assert_eq!(None, add_scheme_for_tld("wow...cool"));
784        assert_eq!(None, add_scheme_for_tld("something..wow"));
785        assert_eq!(None, add_scheme_for_tld("something.....boo"));
786    }
787}