use crate::network::{encode_string, get_raw_html, unescape_string};
use crate::types::*;
use regex::{CaptureMatches, Regex};
use std::str::FromStr;
use ureq::Error as ReqError;
const DATA_REGEX: &str = r"(?:"id":(\d{2,}).+?song_id":(\d+).+?song_name":"(.*?)".*?artist_name":"(.*?)".*?type":"(.+?)".+?votes":(\d*).*?rating":([\d\.]+)).*?"tab_url":"(https:\/\/tabs\.ultimate-guitar\.com\/tab\/.*?\d+)"";
const BASE_SEARCH_URL: &str = "https://www.ultimate-guitar.com/search.php?search_type=title&value=";
pub fn get_search_results(
query: &str,
max_pages: u8,
) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
let mut results: Vec<SearchResult> = vec![];
let max_pages_local = if max_pages == u8::MAX { 254 } else { max_pages };
for i in 1..max_pages_local + 1 {
match search_page(query, i) {
Ok(mut r) => {
if r.is_empty() {
return Ok(results);
}
results.append(&mut r);
}
Err(e) => return Err(e),
}
}
Ok(results)
}
pub fn search_page(
query: &str,
page_to_search: u8,
) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
let search_url: String = BASE_SEARCH_URL.to_string()
+ &encode_string(query)
+ "&page="
+ &page_to_search.to_string();
let raw_html: String;
match get_raw_html(&search_url) {
Ok(d) => raw_html = d,
Err(e) => match e {
ReqError::StatusCode(c) => match c {
404 => return Ok(vec![]),
_ => return Err(e.into()),
},
_ => return Err(e.into()),
},
}
let regex = Regex::new(DATA_REGEX).unwrap();
let captures = regex.captures_iter(&raw_html);
match unwrap_results(captures) {
Ok(r) => Ok(r),
Err(e) => Err(e),
}
}
fn unwrap_results(
matches: CaptureMatches,
) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
let mut results: Vec<SearchResult> = Vec::new();
for regex_match in matches {
let basic_data: BasicSongData = BasicSongData {
song_id: u32::from_str(®ex_match[2])?,
tab_id: u32::from_str(®ex_match[1])?,
title: unescape_string(®ex_match[3]).to_string(),
artist: unescape_string(®ex_match[4]).to_string(),
data_type: get_data_type(®ex_match[5]).unwrap_or_default(),
tab_link: unescape_string(®ex_match[8]).to_string(),
};
let result: SearchResult = SearchResult {
basic_data,
rating_count: u32::from_str(®ex_match[6])?,
rating_value: f32::from_str(®ex_match[7])?,
};
results.push(result);
}
Ok(results)
}
#[cfg(test)]
mod tests {
use crate::{
network::get_raw_html,
search_scraper::{get_search_results, search_page},
};
#[test]
fn search_results() {
let valid_search_queries: Vec<&str> = vec![
"die ärzte",
"NEVER GONNA GIVE you up",
"Don't stop me now",
"Bloc party",
"366 refused",
];
for query in valid_search_queries {
let search_results = get_search_results(query, 1).unwrap();
assert!(!search_results.is_empty());
assert!(get_raw_html(&search_results[0].basic_data.tab_link).is_ok());
}
let no_result_queries: Vec<&str> = vec!["this should_not return any #results!"];
for query in no_result_queries {
assert!(get_search_results(query, 1).unwrap().is_empty());
}
}
#[test]
fn search_single_page() {
let valid_search_queries: Vec<&str> = vec!["NEVER GONNA GIVE you up"];
for query in valid_search_queries {
for i in 1..2 {
assert!(!search_page(query, i).unwrap().is_empty());
}
}
}
}