ug_scraper/
search_scraper.rs1use crate::types::*;
8use crate::network::{encode_string, get_raw_html, unescape_string};
9use regex::{CaptureMatches, Regex};
10use std::str::FromStr;
11use ureq::{Error as ReqError};
12
13const DATA_REGEX: &str = r"(?:"id":(\d{2,}).+?song_id":(\d+).+?song_name":"(.*?)".*?artist_name":"(.*?)".*?type":"(.+?)".+?votes":(\d*).*?rating":([\d\.]+)).*?"tab_url":"(https:\/\/tabs\.ultimate-guitar\.com\/tab\/.*?\d+)";
14const BASE_SEARCH_URL: &str = "https://www.ultimate-guitar.com/search.php?search_type=title&value=";
15
16pub fn get_search_results(query: &str, max_additional_pages: u8) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
40 let mut results: Vec<SearchResult> = vec![];
41 for i in 1..max_additional_pages {
42 match search_page(query, i) {
43 Ok(mut r) => {
44 if r.len() == 0 {
45 return Ok(results)
46 }
47 results.append(&mut r);
48 },
49 Err(e) => return Err(e)
50 }
51 }
52 Ok(results)
53}
54
55pub fn search_page(query: &str, page_to_search: u8) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
76 let search_url: String = BASE_SEARCH_URL.to_string() + &encode_string(query) + "&page=" + &page_to_search.to_string();
77 let raw_html: String;
78 match get_raw_html(&search_url) {
79 Ok(d) => raw_html = d,
80 Err(e) => match e {
81 ReqError::StatusCode(c) => match c {
82 404 => return Ok(vec![]),
83 _ => return Err(e.into()),
84 },
85 _ => return Err(e.into()),
86 },
87 }
88 let regex = Regex::new(DATA_REGEX).unwrap();
89 let captures = regex.captures_iter(&raw_html);
90 match unwrap_results(captures) {
91 Ok(r) => Ok(r),
92 Err(e) => Err(e.into()),
93 }
94}
95
96fn unwrap_results(matches: CaptureMatches) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
97 let mut results: Vec<SearchResult> = Vec::new();
98 for regex_match in matches {
99 let basic_data: BasicSongData = BasicSongData {
100 song_id: u32::from_str(®ex_match[2])?,
101 tab_id: u32::from_str(®ex_match[1])?,
102 title: unescape_string(®ex_match[3]).to_string(),
103 artist: unescape_string(®ex_match[4]).to_string(),
104 data_type: get_data_type(®ex_match[5]).unwrap_or(DataSetType::default()),
105 tab_link: unescape_string(®ex_match[8]).to_string()};
106 let result: SearchResult = SearchResult {
107 basic_data: basic_data,
108 rating_count: u32::from_str(®ex_match[6])?,
109 rating_value: f32::from_str(®ex_match[7])?};
110 results.push(result);
111 }
112 Ok(results)
113}
114
115#[cfg(test)]
116mod tests {
117 use std::u8;
118 use crate::{search_scraper::{get_search_results, search_page}};
119 #[test]
120 fn search_results() {
121 let valid_search_queries: Vec<&str> = vec!["zu spät die ärzte", "NEVER GONNA GIVE you up", "Don't stop me now", "Bloc party"];
122 for query in valid_search_queries {
123 assert!(get_search_results(query, u8::MAX).unwrap().len() >= 1);
124 }
125 let no_result_queries: Vec<&str> = vec!["this should_not return any #results!"];
126 for query in no_result_queries {
127 assert!(get_search_results(query, 1).unwrap().len() == 0);
128 }
129 }
130
131 #[test]
132 fn search_single_page() {
133 let valid_search_queries: Vec<&str> = vec!["NEVER GONNA GIVE you up"];
134 for query in valid_search_queries {
135 for i in 1..2 {
136 assert!(search_page(query, i).unwrap().len() >= 1);
137 }
138 }
139 }
140}