tldextract 0.6.0

extract domain info from a url
Documentation
extern crate tldextract;
use tldextract::TldOption;
use tldextract::TldResult;

#[test]
fn baidu() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("https://www.baidu.com").unwrap(),
        TldResult::new("www", "baidu", "com")
    );
}
#[test]
fn shuiguan() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://www.水管.com").unwrap(),
        TldResult::new("www", "水管", "com")
    );
}
#[test]
fn google() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("https://google.fr").unwrap(),
        TldResult::new(None, "google", "fr")
    );
}
#[test]
fn facebook() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("https://m.facebook.com").unwrap(),
        TldResult::new("m", "facebook", "com")
    );
}
#[test]
fn uestc() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://www.uestc.edu.cn").unwrap(),
        TldResult::new("www", "uestc", "edu.cn")
    );
}
#[test]
fn bbc_uk() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://forums.bbc.co.uk/").unwrap(),
        TldResult::new("forums", "bbc", "co.uk")
    );
}
#[test]
fn cnn() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://forums.news.cnn.com/").unwrap(),
        TldResult::new("forums.news", "cnn", "com")
    );
}
#[test]
fn worldbank() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://www.worldbank.org.kg/").unwrap(),
        TldResult::new("www", "worldbank", "org.kg")
    );
}
#[test]
fn localhost_ip() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://127.0.0.1:8080/deployed/").unwrap(),
        TldResult::new(None, "127.0.0.1", None)
    );
}

#[test]
fn american() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://www.google.com").unwrap(),
        TldResult::new("www", "google", "com")
    );
}

#[test]
fn british() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://www.theregister.co.uk").unwrap(),
        TldResult::new("www", "theregister", "co.uk")
    );
}

#[test]
fn no_subdomain() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://gmail.com").unwrap(),
        TldResult::new(None, "gmail", "com")
    );
}

#[test]
fn nested_subdomain() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://media.forums.theregister.co.uk")
            .unwrap(),
        TldResult::new("media.forums", "theregister", "co.uk")
    );
}

#[test]
fn odd_but_possible() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://www.www.com").unwrap(),
        TldResult::new("www", "www", "com")
    );
    assert_eq!(
        ext.extract("http://www.com").unwrap(),
        TldResult::new(None, "www", "com")
    );
}

#[test]
fn local_host() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://internalunlikelyhostname/").unwrap(),
        TldResult::new(None, "internalunlikelyhostname", None)
    );
    assert_eq!(
        ext.extract("http://internalunlikelyhostname.bizarre")
            .unwrap(),
        TldResult::new("internalunlikelyhostname", "bizarre", None)
    );
}

#[test]
fn qualified_local_host() {
    let ext = TldOption::default().build();

    assert_eq!(
        ext.extract("http://internalunlikelyhostname.info/")
            .unwrap(),
        TldResult::new(None, "internalunlikelyhostname", "info")
    );
    assert_eq!(
        ext.extract("http://internalunlikelyhostname.information/")
            .unwrap(),
        TldResult::new("internalunlikelyhostname", "information", None)
    );
}

#[test]
fn ip() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://216.22.0.192/").unwrap(),
        TldResult::new(None, "216.22.0.192", None)
    );
    assert_eq!(
        ext.extract("http://216.22.project.coop/").unwrap(),
        TldResult::new("216.22", "project", "coop")
    );
}

#[test]
fn punycode() {
    let ext = TldOption::default().build();

    assert_eq!(
        ext.extract("http://xn--h1alffa9f.xn--p1ai").unwrap(),
        TldResult::new(None, "россия", "рф")
    );
}

#[test]
fn punycode2() {
    let ext = TldOption::default().build();

    assert_eq!(
        ext.extract("http://xn--tub-1m9d15sfkkhsifsbqygyujjrw602gk4li5qqk98aca0w.google.com")
            .unwrap(),
        TldResult::new("亲,您好,异常订单退款链接:tub", "google", "com")
    );
}

#[test]
fn invalid_punycode() {
    let ext = TldOption::default().build();

    // Entries that might generate UnicodeError exception
    // This subdomain generates UnicodeError 'IDNA does not round-trip'
    ext.extract("http://xn--tub-1m9d15sfkkhsifsbqygyujjrw602gk4li5qqk98aca0w.google.com")
        .unwrap_err();

    // This subdomain generates UnicodeError 'incomplete punicode string'
    ext.extract("http://xn--tub-1m9d15sfkkhsifsbqygyujjrw60.google.com")
        .unwrap_err();
}

#[test]
fn invalid_puny_with_puny() {
    let ext = TldOption::default().build();
    ext.extract("http://xn--zckzap6140b352by.blog.so-net.xn--wcvs22d.hk")
        .unwrap_err();
}

#[test]
fn puny_with_non_puny() {
    let ext = TldOption::default().build();
    ext.extract("http://xn--zckzap6140b352by.blog.so-net.教育.hk")
        .unwrap_err();
}

#[test]
fn idna_2008() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://xn--gieen46ers-73a.de").unwrap(),
        TldResult::new(None, "gießen46ers", "de")
    );
}

#[test]
fn scheme() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("https://mail.google.com/mail").unwrap(),
        TldResult::new("mail", "google", "com")
    );
    assert_eq!(
        ext.extract("ssh://mail.google.com/mail").unwrap(),
        TldResult::new("mail", "google", "com")
    );
}

#[test]
fn port() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("git+ssh://www.github.com:8443/").unwrap(),
        TldResult::new("www", "github", "com")
    );
}

#[test]
fn username() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("ftp://johndoe:5cr1p7k1dd13@1337.warez.com:2501")
            .unwrap(),
        TldResult::new("1337", "warez", "com")
    );
}

#[test]
fn query_fragment() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://google.com?q=cats").unwrap(),
        TldResult::new(None, "google", "com")
    );
    assert_eq!(
        ext.extract("http://google.com#Welcome").unwrap(),
        TldResult::new(None, "google", "com")
    );
    assert_eq!(
        ext.extract("http://google.com/#Welcome").unwrap(),
        TldResult::new(None, "google", "com")
    );
    assert_eq!(
        ext.extract("http://google.com/s#Welcome").unwrap(),
        TldResult::new(None, "google", "com")
    );
    assert_eq!(
        ext.extract("http://google.com/s?q=cats#Welcome").unwrap(),
        TldResult::new(None, "google", "com")
    );
}

#[test]
fn regex_order() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://www.parliament.uk").unwrap(),
        TldResult::new("www", "parliament", "uk")
    );
    assert_eq!(
        ext.extract("http://www.parliament.co.uk").unwrap(),
        TldResult::new("www", "parliament", "co.uk")
    );
}

#[test]
fn unhandled_by_iana() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://www.cgs.act.edu.au/").unwrap(),
        TldResult::new("www", "cgs", "act.edu.au")
    );
    assert_eq!(
        ext.extract("http://www.google.com.au/").unwrap(),
        TldResult::new("www", "google", "com.au")
    );
}

#[test]
fn ld_is_a_website_too() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://www.metp.net.cn").unwrap(),
        TldResult::new("www", "metp", "net.cn")
    );
    // assert_eq!(ext.extract("http://www.net.cn").unwrap(),
    //            TldResult::new("www", "net", "cn"));
    // This is unhandled by the
    // PSL. Or is it?
}

#[test]
fn dns_root_label() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://www.example.com./").unwrap(),
        TldResult::new("www", "example", "com")
    );
}

#[test]
fn private_domains() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("http://waiterrant.blogspot.com").unwrap(),
        TldResult::new("waiterrant", "blogspot", "com")
    );
}

#[test]
fn whole_url_is_a_suffix() {
    let ext = TldOption::default().build();
    assert_eq!(
        ext.extract("https://es.gov.br").unwrap(),
        TldResult::new(None, None, "es.gov.br")
    );
}