ug_scraper/
search_scraper.rs

1// UG-Scraper - A basic rust API for getting data from Ultimate Guitar
2// Copyright (C) 2025  Linus Tibert
3//
4// This program was originally published under the MIT licence as seen
5// here: https://github.com/Lich-Corals/ug-tab-scraper-rs/blob/mistress/LICENCE
6
7use crate::network::{encode_string, get_raw_html, unescape_string};
8use crate::types::*;
9use regex::{CaptureMatches, Regex};
10use std::str::FromStr;
11use ureq::Error as ReqError;
12
13const DATA_REGEX: &str = r"(?:"id":(\d{2,}).+?song_id":(\d+).+?song_name":"(.*?)".*?artist_name":"(.*?)".*?type":"(.+?)".+?votes":(\d*).*?rating":([\d\.]+)).*?"tab_url":"(https:\/\/tabs\.ultimate-guitar\.com\/tab\/.*?\d+)&quot";
14const BASE_SEARCH_URL: &str = "https://www.ultimate-guitar.com/search.php?search_type=title&value=";
15
16/// Get search results for a query
17///
18/// ## Arguments:
19/// * `query`: The query to search for; can be a song title or an author.
20/// * `max_pages`: The amount of result pages to search
21///     * The function will automatically stop searching if a page is empty. If you want all results, you may use `u8::MAX` without having any performance problems.
22///
23/// ## Example:
24/// ```
25/// use ug_scraper::search_scraper::get_search_results;
26///
27/// // Only gets results of the first page
28/// get_search_results("Never gonna give you up", 1);
29///
30/// // Gets results of the first two pages
31/// get_search_results("Never gonna give you up", 2);
32/// ```
33///
34/// Returns an empty vector if there are no results.
35///
36/// ## Possible errors
37/// * `ureq::Error::*`
38/// * [`crate::error::UGError::UnexpectedWebResultError`]
39pub fn get_search_results(
40        query: &str,
41        max_pages: u8,
42) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
43        let mut results: Vec<SearchResult> = vec![];
44        let max_pages_local = if max_pages == u8::MAX { 254 } else { max_pages };
45        for i in 1..max_pages_local + 1 {
46                match search_page(query, i) {
47                        Ok(mut r) => {
48                                if r.is_empty() {
49                                        return Ok(results);
50                                }
51                                results.append(&mut r);
52                        }
53                        Err(e) => return Err(e),
54                }
55        }
56        Ok(results)
57}
58
59/// Get search results of a single page
60///
61/// ## Arguments:
62/// * `query`: The query to search for; can be a song title or an author.
63/// * `page_to_search`: The page of results to search
64///
65/// ## Example:
66/// ```
67/// use ug_scraper::search_scraper::search_page;
68///
69/// // Returns results from results page 1
70/// search_page("We are number one", 1);
71///
72/// // Returns an empty Vec because results page 0 is searched
73/// search_page("empty spaces", 0);
74/// ```
75///
76/// ## Possible errors
77/// * `ureq::Error::*`
78/// * [`crate::error::UGError::UnexpectedWebResultError`]
79pub fn search_page(
80        query: &str,
81        page_to_search: u8,
82) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
83        let search_url: String = BASE_SEARCH_URL.to_string()
84                + &encode_string(query)
85                + "&page="
86                + &page_to_search.to_string();
87        let raw_html: String;
88        match get_raw_html(&search_url) {
89                Ok(d) => raw_html = d,
90                Err(e) => match e {
91                        ReqError::StatusCode(c) => match c {
92                                404 => return Ok(vec![]),
93                                _ => return Err(e.into()),
94                        },
95                        _ => return Err(e.into()),
96                },
97        }
98        let regex = Regex::new(DATA_REGEX).unwrap();
99        let captures = regex.captures_iter(&raw_html);
100        match unwrap_results(captures) {
101                Ok(r) => Ok(r),
102                Err(e) => Err(e),
103        }
104}
105
106fn unwrap_results(
107        matches: CaptureMatches,
108) -> Result<Vec<SearchResult>, Box<dyn std::error::Error>> {
109        let mut results: Vec<SearchResult> = Vec::new();
110        for regex_match in matches {
111                let basic_data: BasicSongData = BasicSongData {
112                        song_id: u32::from_str(&regex_match[2])?,
113                        tab_id: u32::from_str(&regex_match[1])?,
114                        title: unescape_string(&regex_match[3]).to_string(),
115                        artist: unescape_string(&regex_match[4]).to_string(),
116                        data_type: get_data_type(&regex_match[5]).unwrap_or_default(),
117                        tab_link: unescape_string(&regex_match[8]).to_string(),
118                };
119                let result: SearchResult = SearchResult {
120                        basic_data,
121                        rating_count: u32::from_str(&regex_match[6])?,
122                        rating_value: f32::from_str(&regex_match[7])?,
123                };
124                results.push(result);
125        }
126        Ok(results)
127}
128
129#[cfg(test)]
130mod tests {
131        use crate::{
132                network::get_raw_html,
133                search_scraper::{get_search_results, search_page},
134        };
135        #[test]
136        fn search_results() {
137                let valid_search_queries: Vec<&str> = vec![
138                        "zu spät die ärzte",
139                        "NEVER GONNA GIVE you up",
140                        "Don't stop me now",
141                        "Bloc party",
142                        "REV001 Refused",
143                ];
144                for query in valid_search_queries {
145                        let search_results = get_search_results(query, 1).unwrap();
146                        assert!(!search_results.is_empty());
147                        assert!(get_raw_html(&search_results[0].basic_data.tab_link).is_ok());
148                }
149                let no_result_queries: Vec<&str> = vec!["this should_not return any #results!"];
150                for query in no_result_queries {
151                        assert!(get_search_results(query, 1).unwrap().is_empty());
152                }
153        }
154
155        #[test]
156        fn search_single_page() {
157                let valid_search_queries: Vec<&str> = vec!["NEVER GONNA GIVE you up"];
158                for query in valid_search_queries {
159                        for i in 1..2 {
160                                assert!(!search_page(query, i).unwrap().is_empty());
161                        }
162                }
163        }
164}