1use crate::types::*;
8use crate::network::*;
9use crate::error::UGError;
10use regex::Regex;
11use std::str::FromStr;
12
13const END_OF_CHORDS_DELIM: &str = "","revision_id":";
14const START_OF_CHORDS_DELIM: &str = "":{"wiki_tab":{"content":"";
15const HTML_BLACKLIST: [&str; 1] = [""type":"Video""];
16const VALID_LINK_REGEX: &str = r"http[s]*:\/\/[www.]*[tabs.]*ultimate-guitar.com\/tab\/[\S]+";
17const METADATA_REGEX: &str = r""adsupp_binary_blocked":null,"meta":\{["capo":]*(\d*)[,]*"[tonality":"]*(\w*)[","]*tuning":\{"name":"([^:]*)","value":"([^:]*)",";
18const BASIC_DATA_REGEX: &str = r"tab":\{"id":(\d+),"song_id":(\d+),"song_name":"([^:]+)","artist_id":\d+,"artist_name":"([^:]+)","type":"([\w\s]+)","part":";
19
20pub fn get_song_data(url: &str, replace_german_names: bool) -> Result<Song, Box<dyn std::error::Error>> {
43 let raw_html: String;
44 match get_raw_html(url) {
45 Ok(s) => raw_html = s,
46 Err(e) => return Err(e.into()),
47 }
48 let song_lines: Vec<Line> = get_tab_lines(&raw_html, replace_german_names)?;
49 let song_metadata: Option<SongMetaData>;
50 let basic_song_data: BasicSongData;
51 match get_basic_metadata(&raw_html, url) {
52 Ok(d) => {
53 song_metadata = extract_metadata(&raw_html);
54 basic_song_data = d;
55 }
56 Err(e) => return Err(e.into())
57 }
58 let song: Song = Song { lines: song_lines, metadata: song_metadata, basic_data: basic_song_data };
59 Ok(song)
60}
61
62pub fn get_basic_metadata(raw_html: &str, tab_link: &str) -> Result<BasicSongData, UGError> {
92 validate_html(raw_html)?;
93 validate_link(tab_link)?;
94
95 let regex = Regex::new(BASIC_DATA_REGEX).unwrap();
96 let captures = regex.captures(raw_html);
97 if captures.is_some() {
98 let captures = captures.unwrap();
99 let song_type: DataSetType = get_data_type(&captures[5]).unwrap_or(DataSetType::default());
100 let tab_id: u32;
101 match u32::from_str(&captures[1]) {
102 Ok(i) => tab_id = i,
103 Err(_e) => return Err(UGError::UnexpectedWebResultError),
104 }
105 let song_id: u32;
106 match u32::from_str(&captures[2]) {
107 Ok(i) => song_id = i,
108 Err(_e) => return Err(UGError::UnexpectedWebResultError),
109 }
110 let title = unescape_string(&captures[3]).to_string();
111 let artist = unescape_string(&captures[4]).to_string();
112 let song_basic_meta: BasicSongData = BasicSongData { title: title,
113 artist: artist,
114 tab_link: tab_link.to_string(),
115 song_id: song_id,
116 tab_id: tab_id,
117 data_type: song_type };
118 return Ok(song_basic_meta)
119 } else {
120 return Err(UGError::NoBasicDataMatchError)
121 }
122}
123
124pub fn get_tab_lines(raw_html: &str, replace_german_names: bool) -> Result<Vec<Line>, UGError> {
147 validate_html(raw_html)?;
148 let string_parts: Vec<&str> = raw_html.split(END_OF_CHORDS_DELIM).collect();
149 let raw_data: &str = string_parts[0].split(START_OF_CHORDS_DELIM).collect::<Vec<&str>>()[1];
150 let formatted_string_lines = unescape_string(raw_data);
151 let lines: Vec<Line> = clean_and_evaluate(formatted_string_lines.lines(), replace_german_names);
152 Ok(lines)
153}
154
155pub fn validate_link(url: &str) -> Result<(), UGError> {
159 let regex = Regex::new(VALID_LINK_REGEX).unwrap();
160 let captures = regex.captures(url);
161 match captures {
162 Some(_d) => Ok(()),
163 None => Err(UGError::InvalidURLError),
164 }
165}
166
167fn validate_html(raw_html: &str) -> Result<(), UGError> {
168 for item in HTML_BLACKLIST {
169 if raw_html.contains(item) {
170 return Err(UGError::InvalidHTMLError)
171 }
172 }
173 if !raw_html.contains(START_OF_CHORDS_DELIM) || !raw_html.contains(END_OF_CHORDS_DELIM) {
174 return Err(UGError::InvalidHTMLError)
175 }
176 Ok(())
177}
178
179fn extract_metadata(raw_html: &str) -> Option<SongMetaData> {
180 let regex = Regex::new(METADATA_REGEX).unwrap();
181 let captures = regex.captures(raw_html);
182 let mut song_metadata: SongMetaData = SongMetaData::default();
183 if captures.is_some() {
184 let captures = captures.unwrap();
185 let mut capture_options: [Option<String>; 4] = [Some(captures[1].to_string()),
186 Some(captures[2].to_string()),
187 Some(captures[3].to_string()),
188 Some(captures[4].to_string())];
189 for i in 0..4 {
190 if capture_options[i].clone().unwrap().is_empty() {
191 capture_options[i] = None;
192 }
193 match i {
194 0 => song_metadata.capo = capture_options[i].clone(),
195 1 => song_metadata.tonality = capture_options[i].clone(),
196 2 => song_metadata.tuning_name = capture_options[i].clone(),
197 3 => song_metadata.tuning = capture_options[i].clone(),
198 _ => (),
199 }
200 }
201 } else {
202 return None
203 }
204 return Some(song_metadata)
205}
206
207fn clean_and_evaluate(lines: std::str::Lines<'_>, replace_german_names: bool) -> Vec<Line> {
208 let mut clean_lines: Vec<Line> = Vec::new();
209 for line in lines {
210 let mut line_type: DataType = DataType::Lyric;
211 if line.contains("[ch]") {
212 line_type = DataType::Chord;
213 }
214 let mut clean_line: String = String::from(line);
215 for key in ["[ch]", "[/ch]", "[tab]", "[/tab]"] {
216 clean_line = clean_line.replace(key, "")
217 }
218 if clean_line.contains("[") && clean_line.contains("]") {
219 line_type = DataType::SectionTitle;
220 }
221 let mut line = Line {line_type: line_type, text_data: clean_line};
222 if replace_german_names {
223 line = line.replace_german_names();
224 }
225 clean_lines.push(line);
226 }
227 clean_lines
228}
229
230#[cfg(test)]
231mod tests {
232 use core::panic;
233
234 use super::*;
235
236 #[test]
237 fn get_lines_of_tab() {
238 let tabs_to_get = ["https://tabs.ultimate-guitar.com/tab/367279",
239 "https://tabs.ultimate-guitar.com/tab/queen/dont-stop-me-now-chords-519549"];
240 for tab in tabs_to_get {
241 println!("Getting tab: {}", tab);
242 assert!(!matches!(get_tab_lines(&get_raw_html(tab).unwrap(), true), Err(UGError::InvalidHTMLError)));
243 }
244 }
245
246 #[test]
247 fn tab_link_validation() {
248 assert_eq!(validate_link("https://tabs.ultimate-guitar.com/tab/rick-astley/never-gonna-give-you-up-chords-521741"), Ok(()));
249 assert_ne!(validate_link("tabs.ultimate-guitar.com/tab/refused/rather-be-dead-power-595658"), Ok(()));
250 }
251
252 #[test]
253 fn type_detection() {
254 let type_detection_checks: Vec<(DataSetType, &str)> = vec![(DataSetType::Chords, "https://tabs.ultimate-guitar.com/tab/queen/dont-stop-me-now-chords-519549"),
255 (DataSetType::Chords, "https://tabs.ultimate-guitar.com/tab/rick-astley/never-gonna-give-you-up-chords-521741"),
256 (DataSetType::Bass, "https://tabs.ultimate-guitar.com/tab/bloc-party/this-modern-love-bass-180218"),
257 (DataSetType::Tab, "https://tabs.ultimate-guitar.com/tab/led-zeppelin/stairway-to-heaven-tabs-9488"),
258 (DataSetType::Ukulele, "https://tabs.ultimate-guitar.com/tab/olli-schulz/wenn-es-gut-ist-ukulele-1381967"),
259 (DataSetType::Drums, "https://tabs.ultimate-guitar.com/tab/phil-collins/in-the-air-tonight-drums-880599"),
260 (DataSetType::Bass, "https://tabs.ultimate-guitar.com/tab/pink-floyd/empty-spaces-bass-147995")];
261 for check in type_detection_checks {
262 println!("Testing valid url: {}", check.1);
263 assert_eq!(get_basic_metadata(&get_raw_html(check.1).unwrap(), check.1).unwrap().data_type, check.0);
264 }
265 }
266
267 #[test]
268 fn validate_page_contents() {
269 let valid_page_urls = vec!["https://tabs.ultimate-guitar.com/tab/queen/dont-stop-me-now-chords-519549",
270 "https://tabs.ultimate-guitar.com/tab/rick-astley/never-gonna-give-you-up-chords-521741",
271 "https://tabs.ultimate-guitar.com/tab/led-zeppelin/stairway-to-heaven-tabs-9488",
272 "https://tabs.ultimate-guitar.com/tab/olli-schulz/wenn-es-gut-ist-ukulele-1381967",
273 "https://tabs.ultimate-guitar.com/tab/phil-collins/in-the-air-tonight-drums-880599",
274 "https://tabs.ultimate-guitar.com/tab/blink-182/feeling-this-bass-104175",
275 "https://tabs.ultimate-guitar.com/tab/pink-floyd/empty-spaces-bass-147995",
276 "https://tabs.ultimate-guitar.com/tab/367279"];
277 for valid_page_url in valid_page_urls {
278 println!("Testing valid url: {}", valid_page_url);
279 assert!(!matches!(validate_html(&get_raw_html(valid_page_url).unwrap()), Err(UGError::InvalidHTMLError)));
280 }
281
282 let invalid_page_urls = vec!["https://tabs.ultimate-guitar.com/tab/refused/i-wanna-watch-the-world-burn-guitar-pro-5868920",
283 "https://tabs.ultimate-guitar.com/tab/refused/rather-be-dead-power-595658",
284 "https://tabs.ultimate-guitar.com/tab/the-beatles/let-it-be-video-781202",
285 "https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=RDdQw4w9WgXcQ"];
286 for invalid_page_url in invalid_page_urls {
287 println!("Testing invalid url: {}", invalid_page_url);
288 assert!(matches!(validate_html(&get_raw_html(invalid_page_url).unwrap()), Err(UGError::InvalidHTMLError)));
289 }
290 }
291
292 #[test]
293 fn get_basic_data() {
294 let test_sets: Vec<(&str, &str, &str, u32, u32)> = vec![("https://tabs.ultimate-guitar.com/tab/queen/dont-stop-me-now-chords-519549",
295 "Dont Stop Me Now", "Queen", 15591, 519549),
296 ("https://tabs.ultimate-guitar.com/tab/rick-astley/never-gonna-give-you-up-chords-521741",
297 "Never Gonna Give You Up", "Rick Astley", 196324, 521741),
298 ("https://tabs.ultimate-guitar.com/tab/led-zeppelin/stairway-to-heaven-tabs-9488",
299 "Stairway To Heaven", "Led Zeppelin", 31683, 9488),
300 ("https://tabs.ultimate-guitar.com/tab/olli-schulz/wenn-es-gut-ist-ukulele-1381967",
301 "Wenn Es Gut Ist", "Olli Schulz", 317511, 1381967),
302 ("https://tabs.ultimate-guitar.com/tab/phil-collins/in-the-air-tonight-drums-880599",
303 "In The Air Tonight", "Phil Collins", 138587, 880599),
304 ("https://tabs.ultimate-guitar.com/tab/blink-182/feeling-this-bass-104175",
305 "Feeling This", "Blink-182", 54209, 104175), ("https://tabs.ultimate-guitar.com/tab/pink-floyd/empty-spaces-bass-147995",
307 "Empty Spaces", "Pink Floyd", 17357, 147995),
308 ("https://tabs.ultimate-guitar.com/tab/367279",
309 "Zu Spät", "Die Ärzte", 1577513, 367279)];
310
311 for set in test_sets {
312 let result = get_basic_metadata(&get_raw_html(set.0).unwrap(), set.0).unwrap();
313 assert_eq!(result.title, set.1);
314 assert_eq!(result.artist, set.2);
315 assert_eq!(result.song_id, set.3);
316 assert_eq!(result.tab_id, set.4)
317 }
318 }
319
320 #[test]
321 fn get_metadata() {
322 let url_metadata_sets: Vec<(Option<SongMetaData>, &str)> = vec![(Some(SongMetaData {
323 capo: Some(String::from("3")),
324 tonality: None,
325 tuning_name: Some(String::from("G C E A")),
326 tuning: Some(String::from("G C E A")) }), "https://tabs.ultimate-guitar.com/tab/olli-schulz/wenn-es-gut-ist-ukulele-1381967"),
327 (None, "https://tabs.ultimate-guitar.com/tab/pink-floyd/empty-spaces-bass-147995"),
328 (None, "https://tabs.ultimate-guitar.com/tab/phil-collins/in-the-air-tonight-drums-880599"),
329 (Some(SongMetaData { capo: Some(String::from("1")),
330 tonality: None,
331 tuning_name: Some(String::from("Standard")),
332 tuning: Some(String::from("E A D G B E")) }), "https://tabs.ultimate-guitar.com/tab/rick-astley/never-gonna-give-you-up-chords-521741"),
333 (Some(SongMetaData { capo: None,
334 tonality: Some(String::from("F")),
335 tuning_name: Some(String::from("Standard")),
336 tuning: Some(String::from("E A D G B E")) }), "https://tabs.ultimate-guitar.com/tab/queen/dont-stop-me-now-chords-519549"),];
337 for url_metadata_set in url_metadata_sets {
338 println!("Testing url: {}", stringify!(get_type(&get_raw_html(url_metadata_set.1).unwrap()).unwrap()));
339 match extract_metadata(&get_raw_html(url_metadata_set.1).unwrap()) {
340 Some(d) => assert_eq!(d, url_metadata_set.0.unwrap()),
341 None => {
342 if url_metadata_set.0.is_some() {
343 panic!("Found metadata for song without known metadata.")
344 }
345 },
346 }
347 }
348 }
349}