ug_scraper/
search_scraper.rs1use crate::network::{encode_string, get_raw_html, unescape_string};
8use crate::types::*;
9use regex::{CaptureMatches, Regex};
10use std::str::FromStr;
11use ureq::Error as ReqError;
12
13const DATA_REGEX: &str = r"(?:"id":(\d{2,}).+?song_id":(\d+).+?song_name":"(.*?)".*?artist_name":"(.*?)".*?type":"(.+?)".+?votes":(\d*).*?rating":([\d\.]+)).*?"tab_url":"(https:\/\/tabs\.ultimate-guitar\.com\/tab\/.*?\d+)"";
14const BASE_SEARCH_URL: &str = "https://www.ultimate-guitar.com/search.php?search_type=title&value=";
15
16pub fn get_search_results(
40 query: &str,
41 max_pages: u8,
42) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
43 let mut results: Vec<SearchResult> = vec![];
44 let max_pages_local = if max_pages == u8::MAX { 254 } else { max_pages };
45 for i in 1..max_pages_local + 1 {
46 match search_page(query, i) {
47 Ok(mut r) => {
48 if r.is_empty() {
49 return Ok(results);
50 }
51 results.append(&mut r);
52 }
53 Err(e) => return Err(e),
54 }
55 }
56 Ok(results)
57}
58
59pub fn search_page(
80 query: &str,
81 page_to_search: u8,
82) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
83 let search_url: String = BASE_SEARCH_URL.to_string()
84 + &encode_string(query)
85 + "&page="
86 + &page_to_search.to_string();
87 let raw_html: String;
88 match get_raw_html(&search_url) {
89 Ok(d) => raw_html = d,
90 Err(e) => match e {
91 ReqError::StatusCode(c) => match c {
92 404 => return Ok(vec![]),
93 _ => return Err(e.into()),
94 },
95 _ => return Err(e.into()),
96 },
97 }
98 let regex = Regex::new(DATA_REGEX).unwrap();
99 let captures = regex.captures_iter(&raw_html);
100 match unwrap_results(captures) {
101 Ok(r) => Ok(r),
102 Err(e) => Err(e),
103 }
104}
105
106fn unwrap_results(
107 matches: CaptureMatches,
108) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
109 let mut results: Vec<SearchResult> = Vec::new();
110 for regex_match in matches {
111 let basic_data: BasicSongData = BasicSongData {
112 song_id: u32::from_str(®ex_match[2])?,
113 tab_id: u32::from_str(®ex_match[1])?,
114 title: unescape_string(®ex_match[3]).to_string(),
115 artist: unescape_string(®ex_match[4]).to_string(),
116 data_type: get_data_type(®ex_match[5]).unwrap_or_default(),
117 tab_link: unescape_string(®ex_match[8]).to_string(),
118 };
119 let result: SearchResult = SearchResult {
120 basic_data,
121 rating_count: u32::from_str(®ex_match[6])?,
122 rating_value: f32::from_str(®ex_match[7])?,
123 };
124 results.push(result);
125 }
126 Ok(results)
127}
128
129#[cfg(test)]
130mod tests {
131 use crate::{
132 network::get_raw_html,
133 search_scraper::{get_search_results, search_page},
134 };
135 #[test]
136 fn search_results() {
137 let valid_search_queries: Vec<&str> = vec![
138 "zu spät die ärzte",
139 "NEVER GONNA GIVE you up",
140 "Don't stop me now",
141 "Bloc party",
142 "REV001 Refused",
143 ];
144 for query in valid_search_queries {
145 let search_results = get_search_results(query, 1).unwrap();
146 assert!(!search_results.is_empty());
147 assert!(get_raw_html(&search_results[0].basic_data.tab_link).is_ok());
148 }
149 let no_result_queries: Vec<&str> = vec!["this should_not return any #results!"];
150 for query in no_result_queries {
151 assert!(get_search_results(query, 1).unwrap().is_empty());
152 }
153 }
154
155 #[test]
156 fn search_single_page() {
157 let valid_search_queries: Vec<&str> = vec!["NEVER GONNA GIVE you up"];
158 for query in valid_search_queries {
159 for i in 1..2 {
160 assert!(!search_page(query, i).unwrap().is_empty());
161 }
162 }
163 }
164}