1use crate::types::*;
8use crate::network::*;
9use crate::error::UGError;
10use regex::Regex;
11use std::str::FromStr;
12
13const END_OF_CHORDS_DELIM: &str = "","revision_id":";
14const START_OF_CHORDS_DELIM: &str = "":{"wiki_tab":{"content":"";
15const HTML_BLACKLIST: [&str; 1] = [""type":"Video""];
16const VALID_LINK_REGEX: &str = r"http[s]*:\/\/[www.]*[tabs.]*ultimate-guitar.com\/tab\/[\S]+";
17const METADATA_REGEX: &str = r""adsupp_binary_blocked":null,"meta":\{["capo":]*(\d*)[,]*"[tonality":"]*(\w*)[","]*tuning":\{"name":"([^:]*)","value":"([^:]*)",";
18const BASIC_DATA_REGEX: &str = r"tab":\{"id":(\d+),"song_id":(\d+),"song_name":"([^:]+)","artist_id":\d+,"artist_name":"([^:]+)","type":"([\w\s]+)","part":";
19
20pub fn get_song_data(url: &str, replace_german_names: bool) -> Result<Song, Box<dyn std::error::Error>> {
43 let raw_html: String;
44 match get_raw_html(url) {
45 Ok(s) => raw_html = s,
46 Err(e) => return Err(e.into()),
47 }
48 let song_lines: Vec<Line> = get_tab_lines(&raw_html, replace_german_names)?;
49 let song_metadata: Option<SongMetaData>;
50 let basic_song_data: BasicSongData;
51 match get_basic_metadata(&raw_html, url) {
52 Ok(d) => {
53 song_metadata = extract_metadata(&raw_html);
54 basic_song_data = d;
55 }
56 Err(e) => return Err(e.into())
57 }
58 let song: Song = Song { lines: song_lines, metadata: song_metadata, basic_data: basic_song_data };
59 Ok(song)
60}
61
62pub fn get_basic_metadata(raw_html: &str, tab_link: &str) -> Result<BasicSongData, UGError> {
92 validate_html(raw_html)?;
93 validate_link(tab_link)?;
94
95 let regex = Regex::new(BASIC_DATA_REGEX).unwrap();
96 let captures = regex.captures(raw_html);
97 if captures.is_some() {
98 let captures = captures.unwrap();
99 let song_type: DataSetType = get_data_type(&captures[5]).unwrap_or(DataSetType::default());
100 let tab_id: u32;
101 match u32::from_str(&captures[1]) {
102 Ok(i) => tab_id = i,
103 Err(_e) => return Err(UGError::UnexpectedWebResultError),
104 }
105 let song_id: u32;
106 match u32::from_str(&captures[2]) {
107 Ok(i) => song_id = i,
108 Err(_e) => return Err(UGError::UnexpectedWebResultError),
109 }
110 let title = unescape_string(&captures[3]).to_string();
111 let artist = unescape_string(&captures[4]).to_string();
112 let song_basic_meta: BasicSongData = BasicSongData { title: title,
113 artist: artist,
114 tab_link: tab_link.to_string(),
115 song_id: song_id,
116 tab_id: tab_id,
117 data_type: song_type };
118 return Ok(song_basic_meta)
119 } else {
120 return Err(UGError::NoBasicDataMatchError)
121 }
122}
123
124pub fn get_tab_lines(raw_html: &str, replace_german_names: bool) -> Result<Vec<Line>, UGError> {
147 validate_html(raw_html)?;
148 let string_parts: Vec<&str> = raw_html.split(END_OF_CHORDS_DELIM).collect();
149 let raw_data: &str = string_parts[0].split(START_OF_CHORDS_DELIM).collect::<Vec<&str>>()[1];
150 let formatted_string_lines = unescape_string(raw_data);
151 let lines: Vec<Line> = clean_and_evaluate(formatted_string_lines.lines(), replace_german_names);
152 Ok(lines)
153}
154
155fn validate_html(raw_html: &str) -> Result<(), UGError> {
156 for item in HTML_BLACKLIST {
157 if raw_html.contains(item) {
158 return Err(UGError::InvalidHTMLError)
159 }
160 }
161 if !raw_html.contains(START_OF_CHORDS_DELIM) || !raw_html.contains(END_OF_CHORDS_DELIM) {
162 return Err(UGError::InvalidHTMLError)
163 }
164 Ok(())
165}
166
167fn validate_link(url: &str) -> Result<(), UGError> {
168 let regex = Regex::new(VALID_LINK_REGEX).unwrap();
169 let captures = regex.captures(url);
170 match captures {
171 Some(_d) => Ok(()),
172 None => Err(UGError::InvalidURLError),
173 }
174
175}
176
177fn extract_metadata(raw_html: &str) -> Option<SongMetaData> {
178 let regex = Regex::new(METADATA_REGEX).unwrap();
179 let captures = regex.captures(raw_html);
180 let mut song_metadata: SongMetaData = SongMetaData::default();
181 if captures.is_some() {
182 let captures = captures.unwrap();
183 let mut capture_options: [Option<String>; 4] = [Some(captures[1].to_string()),
184 Some(captures[2].to_string()),
185 Some(captures[3].to_string()),
186 Some(captures[4].to_string())];
187 for i in 0..4 {
188 if capture_options[i].clone().unwrap().is_empty() {
189 capture_options[i] = None;
190 }
191 match i {
192 0 => song_metadata.capo = capture_options[i].clone(),
193 1 => song_metadata.tonality = capture_options[i].clone(),
194 2 => song_metadata.tuning_name = capture_options[i].clone(),
195 3 => song_metadata.tuning = capture_options[i].clone(),
196 _ => (),
197 }
198 }
199 } else {
200 return None
201 }
202 return Some(song_metadata)
203}
204
205fn clean_and_evaluate(lines: std::str::Lines<'_>, replace_german_names: bool) -> Vec<Line> {
206 let mut clean_lines: Vec<Line> = Vec::new();
207 for line in lines {
208 let mut line_type: DataType = DataType::Lyric;
209 if line.contains("[ch]") {
210 line_type = DataType::Chord;
211 }
212 let mut clean_line: String = String::from(line);
213 for key in ["[ch]", "[/ch]", "[tab]", "[/tab]"] {
214 clean_line = clean_line.replace(key, "")
215 }
216 if clean_line.contains("[") && clean_line.contains("]") {
217 line_type = DataType::SectionTitle;
218 }
219 let mut line = Line {line_type: line_type, text_data: clean_line};
220 if replace_german_names {
221 line = line.replace_german_names();
222 }
223 clean_lines.push(line);
224 }
225 clean_lines
226}
227
228#[cfg(test)]
229mod tests {
230 use core::panic;
231
232 use super::*;
233
234 #[test]
235 fn get_lines_of_tab() {
236 let tabs_to_get = ["https://tabs.ultimate-guitar.com/tab/367279",
237 "https://tabs.ultimate-guitar.com/tab/queen/dont-stop-me-now-chords-519549"];
238 for tab in tabs_to_get {
239 println!("Getting tab: {}", tab);
240 assert!(!matches!(get_tab_lines(&get_raw_html(tab).unwrap(), true), Err(UGError::InvalidHTMLError)));
241 }
242 }
243
244 #[test]
245 fn tab_link_validation() {
246 assert_eq!(validate_link("https://tabs.ultimate-guitar.com/tab/rick-astley/never-gonna-give-you-up-chords-521741"), Ok(()));
247 assert_ne!(validate_link("tabs.ultimate-guitar.com/tab/refused/rather-be-dead-power-595658"), Ok(()));
248 }
249
250 #[test]
251 fn type_detection() {
252 let type_detection_checks: Vec<(DataSetType, &str)> = vec![(DataSetType::Chords, "https://tabs.ultimate-guitar.com/tab/queen/dont-stop-me-now-chords-519549"),
253 (DataSetType::Chords, "https://tabs.ultimate-guitar.com/tab/rick-astley/never-gonna-give-you-up-chords-521741"),
254 (DataSetType::Bass, "https://tabs.ultimate-guitar.com/tab/bloc-party/this-modern-love-bass-180218"),
255 (DataSetType::Tab, "https://tabs.ultimate-guitar.com/tab/led-zeppelin/stairway-to-heaven-tabs-9488"),
256 (DataSetType::Ukulele, "https://tabs.ultimate-guitar.com/tab/olli-schulz/wenn-es-gut-ist-ukulele-1381967"),
257 (DataSetType::Drums, "https://tabs.ultimate-guitar.com/tab/phil-collins/in-the-air-tonight-drums-880599"),
258 (DataSetType::Bass, "https://tabs.ultimate-guitar.com/tab/pink-floyd/empty-spaces-bass-147995")];
259 for check in type_detection_checks {
260 println!("Testing valid url: {}", check.1);
261 assert_eq!(get_basic_metadata(&get_raw_html(check.1).unwrap(), check.1).unwrap().data_type, check.0);
262 }
263 }
264
265 #[test]
266 fn validate_page_contents() {
267 let valid_page_urls = vec!["https://tabs.ultimate-guitar.com/tab/queen/dont-stop-me-now-chords-519549",
268 "https://tabs.ultimate-guitar.com/tab/rick-astley/never-gonna-give-you-up-chords-521741",
269 "https://tabs.ultimate-guitar.com/tab/led-zeppelin/stairway-to-heaven-tabs-9488",
270 "https://tabs.ultimate-guitar.com/tab/olli-schulz/wenn-es-gut-ist-ukulele-1381967",
271 "https://tabs.ultimate-guitar.com/tab/phil-collins/in-the-air-tonight-drums-880599",
272 "https://tabs.ultimate-guitar.com/tab/blink-182/feeling-this-bass-104175",
273 "https://tabs.ultimate-guitar.com/tab/pink-floyd/empty-spaces-bass-147995",
274 "https://tabs.ultimate-guitar.com/tab/367279"];
275 for valid_page_url in valid_page_urls {
276 println!("Testing valid url: {}", valid_page_url);
277 assert!(!matches!(validate_html(&get_raw_html(valid_page_url).unwrap()), Err(UGError::InvalidHTMLError)));
278 }
279
280 let invalid_page_urls = vec!["https://tabs.ultimate-guitar.com/tab/refused/i-wanna-watch-the-world-burn-guitar-pro-5868920",
281 "https://tabs.ultimate-guitar.com/tab/refused/rather-be-dead-power-595658",
282 "https://tabs.ultimate-guitar.com/tab/the-beatles/let-it-be-video-781202",
283 "https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=RDdQw4w9WgXcQ"];
284 for invalid_page_url in invalid_page_urls {
285 println!("Testing invalid url: {}", invalid_page_url);
286 assert!(matches!(validate_html(&get_raw_html(invalid_page_url).unwrap()), Err(UGError::InvalidHTMLError)));
287 }
288 }
289
290 #[test]
291 fn get_basic_data() {
292 let test_sets: Vec<(&str, &str, &str, u32, u32)> = vec![("https://tabs.ultimate-guitar.com/tab/queen/dont-stop-me-now-chords-519549",
293 "Dont Stop Me Now", "Queen", 15591, 519549),
294 ("https://tabs.ultimate-guitar.com/tab/rick-astley/never-gonna-give-you-up-chords-521741",
295 "Never Gonna Give You Up", "Rick Astley", 196324, 521741),
296 ("https://tabs.ultimate-guitar.com/tab/led-zeppelin/stairway-to-heaven-tabs-9488",
297 "Stairway To Heaven", "Led Zeppelin", 31683, 9488),
298 ("https://tabs.ultimate-guitar.com/tab/olli-schulz/wenn-es-gut-ist-ukulele-1381967",
299 "Wenn Es Gut Ist", "Olli Schulz", 317511, 1381967),
300 ("https://tabs.ultimate-guitar.com/tab/phil-collins/in-the-air-tonight-drums-880599",
301 "In The Air Tonight", "Phil Collins", 138587, 880599),
302 ("https://tabs.ultimate-guitar.com/tab/blink-182/feeling-this-bass-104175",
303 "Feeling This", "Blink-182", 54209, 104175), ("https://tabs.ultimate-guitar.com/tab/pink-floyd/empty-spaces-bass-147995",
305 "Empty Spaces", "Pink Floyd", 17357, 147995),
306 ("https://tabs.ultimate-guitar.com/tab/367279",
307 "Zu Spät", "Die Ärzte", 1577513, 367279)];
308
309 for set in test_sets {
310 let result = get_basic_metadata(&get_raw_html(set.0).unwrap(), set.0).unwrap();
311 assert_eq!(result.title, set.1);
312 assert_eq!(result.artist, set.2);
313 assert_eq!(result.song_id, set.3);
314 assert_eq!(result.tab_id, set.4)
315 }
316 }
317
318 #[test]
319 fn get_metadata() {
320 let url_metadata_sets: Vec<(Option<SongMetaData>, &str)> = vec![(Some(SongMetaData {
321 capo: Some(String::from("3")),
322 tonality: None,
323 tuning_name: Some(String::from("G C E A")),
324 tuning: Some(String::from("G C E A")) }), "https://tabs.ultimate-guitar.com/tab/olli-schulz/wenn-es-gut-ist-ukulele-1381967"),
325 (None, "https://tabs.ultimate-guitar.com/tab/pink-floyd/empty-spaces-bass-147995"),
326 (None, "https://tabs.ultimate-guitar.com/tab/phil-collins/in-the-air-tonight-drums-880599"),
327 (Some(SongMetaData { capo: Some(String::from("1")),
328 tonality: None,
329 tuning_name: Some(String::from("Standard")),
330 tuning: Some(String::from("E A D G B E")) }), "https://tabs.ultimate-guitar.com/tab/rick-astley/never-gonna-give-you-up-chords-521741"),
331 (Some(SongMetaData { capo: None,
332 tonality: Some(String::from("F")),
333 tuning_name: Some(String::from("Standard")),
334 tuning: Some(String::from("E A D G B E")) }), "https://tabs.ultimate-guitar.com/tab/queen/dont-stop-me-now-chords-519549"),];
335 for url_metadata_set in url_metadata_sets {
336 println!("Testing url: {}", stringify!(get_type(&get_raw_html(url_metadata_set.1).unwrap()).unwrap()));
337 match extract_metadata(&get_raw_html(url_metadata_set.1).unwrap()) {
338 Some(d) => assert_eq!(d, url_metadata_set.0.unwrap()),
339 None => {
340 if url_metadata_set.0.is_some() {
341 panic!("Found metadata for song without known metadata.")
342 }
343 },
344 }
345 }
346 }
347}