use std::collections::HashMap;
use crate::{SchemeState, UrlLocation, UrlLocator};
#[test]
fn advance_schemes() {
let state = SchemeState::NONE;
assert_eq!(state, SchemeState::NONE);
let state = state.advance('h');
assert_eq!(state, SchemeState::H);
let state = state.advance('t');
assert_eq!(state, SchemeState::HT);
let state = state.advance('T');
assert_eq!(state, SchemeState::HTT);
let state = state.advance('p');
assert_eq!(state, SchemeState::HTTP);
let state = state.advance('S');
assert_eq!(state, SchemeState::HTTPS);
let state = state.advance(':');
assert_eq!(state, SchemeState::COMPLETE);
}
#[test]
fn boundaries() {
assert_eq!(max_len("before https://example.org after"), Some(19));
assert_eq!(position("before https://example.org after"), (7, 6));
assert_eq!(position("before https://example.org"), (7, 0));
assert_eq!(position("https://example.org after"), (0, 6));
assert_eq!(position("https://example.org/test'ing;"), (0, 5));
}
#[test]
fn start() {
assert_eq!(max_len("https://example.org/test\u{00}ing"), Some(24));
assert_eq!(max_len("https://example.org/test\u{1F}ing"), Some(24));
assert_eq!(max_len("https://example.org/test\u{7F}ing"), Some(24));
assert_eq!(max_len("https://example.org/test\u{9F}ing"), Some(24));
assert_eq!(max_len("https://example.org/test\ting"), Some(24));
assert_eq!(max_len("https://example.org/test ing"), Some(24));
assert_eq!(max_len("https://example.org/test?ing"), Some(28));
assert_eq!(max_len("https://example.org/.,;:(!?"), Some(20));
assert_eq!(max_len("https://example.org/"), Some(20));
}
#[test]
fn end() {
assert_eq!(max_len("complicated:https://example.org"), Some(19));
assert_eq!(max_len("\u{2502}https://example.org"), Some(19));
assert_eq!(max_len("test.https://example.org"), Some(19));
assert_eq!(max_len("https://sub.example.org"), Some(23));
assert_eq!(max_len(",https://example.org"), Some(19));
}
#[test]
fn url_unicode() {
assert_eq!(max_len("https://xn--example-2b07f.org"), Some(29));
assert_eq!(max_len("https://example.org/\u{2008A}"), Some(21));
assert_eq!(max_len("https://example.org/\u{f17c}"), Some(21));
assert_eq!(max_len("https://üñîçøðé.com/ä"), Some(21));
}
#[test]
fn url_schemes() {
assert_eq!(max_len("invalidscheme://example.org"), None);
assert_eq!(max_len("mailto://example.org"), Some(20));
assert_eq!(max_len("https://example.org"), Some(19));
assert_eq!(max_len("http://example.org"), Some(18));
assert_eq!(max_len("news://example.org"), Some(18));
assert_eq!(max_len("file://example.org"), Some(18));
assert_eq!(max_len("git://example.org"), Some(17));
assert_eq!(max_len("ssh://example.org"), Some(17));
assert_eq!(max_len("ftp://example.org"), Some(17));
}
#[test]
fn url_matching_chars() {
assert_eq!(max_len("(https://example.org/test(ing)/?)"), Some(30));
assert_eq!(max_len("(https://example.org/test(ing))"), Some(29));
assert_eq!(max_len("https://example.org/test(ing)"), Some(29));
assert_eq!(max_len("((https://example.org))"), Some(19));
assert_eq!(max_len(")https://example.org("), Some(19));
assert_eq!(max_len("https://example.org)"), Some(19));
assert_eq!(max_len("https://example.org("), Some(19));
assert_eq!(max_len("https://[2001:db8:a0b:12f0::1]:80"), Some(33));
assert_eq!(max_len("([(https://example.org/test(ing))])"), Some(29));
assert_eq!(max_len("https://example.org/]()"), Some(20));
assert_eq!(max_len("[https://example.org]"), Some(19));
assert_eq!(max_len("'https://example.org/test'ing''"), Some(29));
assert_eq!(max_len("https://example.org/test'ing'"), Some(29));
assert_eq!(max_len("'https://example.org'"), Some(19));
assert_eq!(max_len("https://example.org'"), Some(19));
assert_eq!(max_len("\"https://example.org\""), Some(19));
assert_eq!(max_len("\"https://example.org"), Some(19));
assert_eq!(max_len("⟨https://example.org⟩"), Some(19));
assert_eq!(max_len("⟩https://example.org⟨"), Some(19));
}
#[test]
fn markdown() {
let input = "[test](https://example.org)";
let mut result_map = HashMap::new();
result_map.insert(25, UrlLocation::Url(19, 0));
result_map.insert(26, UrlLocation::Reset);
exact_url_match(input, result_map);
let input = "[https://example.org](test)";
let mut result_map = HashMap::new();
result_map.insert(19, UrlLocation::Url(19, 0));
result_map.insert(20, UrlLocation::Reset);
exact_url_match(input, result_map);
let input = "[https://example.org](https://example.org/longer)";
let mut result_map = HashMap::new();
result_map.insert(19, UrlLocation::Url(19, 0));
result_map.insert(20, UrlLocation::Reset);
result_map.insert(47, UrlLocation::Url(26, 0));
result_map.insert(48, UrlLocation::Reset);
exact_url_match(input, result_map);
}
#[test]
fn file() {
assert_eq!(max_len("file:///test.rs:13:9"), Some(20));
assert_eq!(max_len("file:///test"), Some(12));
assert_eq!(max_len("file://test"), Some(11));
assert_eq!(max_len("file:/test"), Some(10));
assert_eq!(max_len("file:test"), Some(9));
}
#[test]
fn multiple_urls() {
let input = "https://example.org https://example.com/test";
let mut result_map = HashMap::new();
result_map.insert(18, UrlLocation::Url(19, 0));
result_map.insert(19, UrlLocation::Reset);
result_map.insert(43, UrlLocation::Url(24, 0));
exact_url_match(input, result_map);
}
#[test]
fn parser_states() {
let input = " https://example.org test ;";
let mut result_map = HashMap::new();
result_map.insert(0, UrlLocation::Reset);
result_map.insert(3, UrlLocation::Scheme);
result_map.insert(8, UrlLocation::Scheme);
result_map.insert(9, UrlLocation::Url(7, 0));
result_map.insert(21, UrlLocation::Url(19, 0));
result_map.insert(22, UrlLocation::Reset);
result_map.insert(24, UrlLocation::Reset);
result_map.insert(25, UrlLocation::Scheme);
result_map.insert(26, UrlLocation::Reset);
exact_url_match(input, result_map);
}
fn exact_url_match(input: &str, result_map: HashMap<usize, UrlLocation>) {
let mut locator = UrlLocator::new();
for (i, c) in input.chars().enumerate() {
let result = locator.advance(c);
if let Some(expected) = result_map.get(&i) {
assert_eq!(&result, expected);
}
}
}
fn max_len(input: &str) -> Option<u16> {
let mut locator = UrlLocator::new();
let mut url_len = None;
for c in input.chars() {
if let UrlLocation::Url(len, _end_offset) = locator.advance(c) {
url_len = Some(len);
}
}
url_len
}
fn position(input: &str) -> (usize, usize) {
let mut locator = UrlLocator::new();
let mut url = None;
for (i, c) in input.chars().enumerate() {
if let UrlLocation::Url(len, end_offset) = locator.advance(c) {
url = Some((i + 1 - end_offset as usize, len as usize));
}
}
url.map(|(end, len)| (end - len, input.len() - end)).unwrap()
}
#[cfg(all(test, feature = "nightly"))]
mod bench {
extern crate test;
use crate::{UrlLocation, UrlLocator};
#[bench]
fn library(b: &mut test::Bencher) {
let mut input = String::new();
for i in 0..10_000 {
if i % 1_000 == 0 {
input.push_str("https://example.org");
} else {
input.push_str(" test ");
}
}
b.iter(|| {
let mut locator = UrlLocator::new();
for c in input.chars() {
if let UrlLocation::Url(len, end_offset) = locator.advance(c) {
test::black_box((len, end_offset));
}
}
});
}
#[bench]
fn lower_bound(b: &mut test::Bencher) {
let mut input = String::new();
for i in 0..10_000 {
if i % 1_000 == 0 {
input.push_str("https://example.org");
} else {
input.push_str(" test ");
}
}
b.iter(|| {
for c in input.chars().rev() {
test::black_box(c);
}
});
}
}