1use crate::{Album, AlbumPage, LastFmError, Result, Track, TrackPage};
8use scraper::{Html, Selector};
9
10#[derive(Debug)]
15pub struct LastFmParser;
16
17impl LastFmParser {
18 pub fn new() -> Self {
20 Self
21 }
22
23 pub fn parse_recent_scrobbles(&self, document: &Html) -> Result<Vec<Track>> {
26 let mut tracks = Vec::new();
27
28 let table_selector = Selector::parse("table.chartlist").unwrap();
30 let row_selector = Selector::parse("tbody tr").unwrap();
31
32 let tables: Vec<_> = document.select(&table_selector).collect();
33 log::debug!("Found {} chartlist tables", tables.len());
34
35 for table in tables {
36 for row in table.select(&row_selector) {
37 if let Ok(track) = self.parse_recent_scrobble_row(&row) {
38 tracks.push(track);
39 }
40 }
41 }
42
43 if tracks.is_empty() {
44 log::debug!("No tracks found in recent scrobbles");
45 }
46
47 log::debug!("Parsed {} recent scrobbles", tracks.len());
48 Ok(tracks)
49 }
50
51 fn parse_recent_scrobble_row(&self, row: &scraper::ElementRef) -> Result<Track> {
53 let name_selector = Selector::parse(".chartlist-name a").unwrap();
55 let name = row
56 .select(&name_selector)
57 .next()
58 .ok_or(LastFmError::Parse("Missing track name".to_string()))?
59 .text()
60 .collect::<String>()
61 .trim()
62 .to_string();
63
64 let artist_selector = Selector::parse(".chartlist-artist a").unwrap();
66 let artist = row
67 .select(&artist_selector)
68 .next()
69 .ok_or(LastFmError::Parse("Missing artist name".to_string()))?
70 .text()
71 .collect::<String>()
72 .trim()
73 .to_string();
74
75 let timestamp = self.extract_scrobble_timestamp(row);
77
78 let album = self.extract_scrobble_album(row);
80
81 let playcount = 1;
83
84 Ok(Track {
85 name,
86 artist,
87 playcount,
88 timestamp,
89 album,
90 })
91 }
92
93 fn extract_scrobble_timestamp(&self, row: &scraper::ElementRef) -> Option<u64> {
95 if let Some(timestamp_str) = row.value().attr("data-timestamp") {
99 if let Ok(timestamp) = timestamp_str.parse::<u64>() {
100 return Some(timestamp);
101 }
102 }
103
104 let timestamp_input_selector = Selector::parse("input[name='timestamp']").unwrap();
106 if let Some(input) = row.select(×tamp_input_selector).next() {
107 if let Some(value) = input.value().attr("value") {
108 if let Ok(timestamp) = value.parse::<u64>() {
109 return Some(timestamp);
110 }
111 }
112 }
113
114 let edit_form_selector =
116 Selector::parse("form[data-edit-scrobble] input[name='timestamp']").unwrap();
117 if let Some(timestamp_input) = row.select(&edit_form_selector).next() {
118 if let Some(value) = timestamp_input.value().attr("value") {
119 if let Ok(timestamp) = value.parse::<u64>() {
120 return Some(timestamp);
121 }
122 }
123 }
124
125 let time_selector = Selector::parse("time").unwrap();
127 if let Some(time_elem) = row.select(&time_selector).next() {
128 if let Some(datetime) = time_elem.value().attr("datetime") {
129 if let Ok(parsed_time) = chrono::DateTime::parse_from_rfc3339(datetime) {
131 return Some(parsed_time.timestamp() as u64);
132 }
133 }
134 }
135
136 None
137 }
138
139 fn extract_scrobble_album(&self, row: &scraper::ElementRef) -> Option<String> {
141 let album_input_selector =
143 Selector::parse("form[data-edit-scrobble] input[name='album_name']").unwrap();
144
145 if let Some(album_input) = row.select(&album_input_selector).next() {
146 if let Some(album_name) = album_input.value().attr("value") {
147 if !album_name.is_empty() {
148 return Some(album_name.to_string());
149 }
150 }
151 }
152
153 None
154 }
155
156 pub fn parse_tracks_page(
158 &self,
159 document: &Html,
160 page_number: u32,
161 artist: &str,
162 ) -> Result<TrackPage> {
163 let tracks = self.extract_tracks_from_document(document, artist)?;
164
165 let (has_next_page, total_pages) = self.parse_pagination(document, page_number)?;
167
168 Ok(TrackPage {
169 tracks,
170 page_number,
171 has_next_page,
172 total_pages,
173 })
174 }
175
176 pub fn extract_tracks_from_document(
178 &self,
179 document: &Html,
180 artist: &str,
181 ) -> Result<Vec<Track>> {
182 let mut tracks = Vec::new();
183 let mut seen_tracks = std::collections::HashSet::new();
184
185 if let Ok(json_tracks) = self.parse_json_tracks_page(document, 1, artist) {
187 return Ok(json_tracks.tracks);
188 }
189
190 let track_selector = Selector::parse("[data-track-name]").unwrap();
192 let track_elements: Vec<_> = document.select(&track_selector).collect();
193
194 if !track_elements.is_empty() {
195 for element in track_elements {
196 let track_name = element.value().attr("data-track-name").unwrap_or("");
197 if !track_name.is_empty() && !seen_tracks.contains(track_name) {
198 seen_tracks.insert(track_name.to_string());
199
200 if let Ok(playcount) = self.find_playcount_for_track(document, track_name) {
201 let timestamp = self.find_timestamp_for_track(document, track_name);
202 let track = Track {
203 name: track_name.to_string(),
204 artist: artist.to_string(),
205 playcount,
206 timestamp,
207 album: None, };
209 tracks.push(track);
210 }
211 if tracks.len() >= 50 {
212 break;
213 }
214 }
215 }
216 }
217
218 if tracks.len() < 50 {
220 let form_input_selector = Selector::parse("input[name='track']").unwrap();
221 for input in document.select(&form_input_selector) {
222 if let Some(track_name) = input.value().attr("value") {
223 if !track_name.is_empty() && !seen_tracks.contains(track_name) {
224 seen_tracks.insert(track_name.to_string());
225
226 let playcount = self
227 .find_playcount_for_track(document, track_name)
228 .unwrap_or(0);
229 let timestamp = self.find_timestamp_for_track(document, track_name);
230 let track = Track {
231 name: track_name.to_string(),
232 artist: artist.to_string(),
233 playcount,
234 timestamp,
235 album: None, };
237 tracks.push(track);
238 if tracks.len() >= 50 {
239 break;
240 }
241 }
242 }
243 }
244 }
245
246 if tracks.len() < 10 {
248 let table_tracks = self.parse_tracks_from_rows(document, artist)?;
249 for track in table_tracks {
250 if !seen_tracks.contains(&track.name) && tracks.len() < 50 {
251 seen_tracks.insert(track.name.clone());
252 tracks.push(track);
253 }
254 }
255 }
256
257 log::debug!("Successfully extracted {} unique tracks", tracks.len());
258 Ok(tracks)
259 }
260
261 fn parse_tracks_from_rows(&self, document: &Html, artist: &str) -> Result<Vec<Track>> {
263 let mut tracks = Vec::new();
264 let table_selector = Selector::parse("table.chartlist").unwrap();
265 let row_selector = Selector::parse("tbody tr").unwrap();
266
267 for table in document.select(&table_selector) {
268 for row in table.select(&row_selector) {
269 if let Ok(mut track) = self.parse_track_row(&row) {
270 track.artist = artist.to_string(); tracks.push(track);
272 }
273 }
274 }
275 Ok(tracks)
276 }
277
278 pub fn parse_track_row(&self, row: &scraper::ElementRef) -> Result<Track> {
280 let name = self.extract_name_from_row(row, "track")?;
282
283 let playcount = self.extract_playcount_from_row(row);
285
286 let artist = "".to_string(); Ok(Track {
289 name,
290 artist,
291 playcount,
292 timestamp: None, album: None, })
295 }
296
297 pub fn parse_albums_page(
299 &self,
300 document: &Html,
301 page_number: u32,
302 artist: &str,
303 ) -> Result<AlbumPage> {
304 let mut albums = Vec::new();
305
306 let album_selector = Selector::parse("[data-album-name]").unwrap();
308 let album_elements: Vec<_> = document.select(&album_selector).collect();
309
310 if !album_elements.is_empty() {
311 log::debug!(
312 "Found {} album elements with data-album-name",
313 album_elements.len()
314 );
315
316 let mut seen_albums = std::collections::HashSet::new();
318
319 for element in album_elements {
320 let album_name = element.value().attr("data-album-name").unwrap_or("");
321 if !album_name.is_empty() && !seen_albums.contains(album_name) {
322 seen_albums.insert(album_name.to_string());
323
324 if let Ok(playcount) = self.find_playcount_for_album(document, album_name) {
325 let timestamp = self.find_timestamp_for_album(document, album_name);
326 let album = Album {
327 name: album_name.to_string(),
328 artist: artist.to_string(),
329 playcount,
330 timestamp,
331 };
332 albums.push(album);
333 }
334
335 if albums.len() >= 50 {
336 break;
337 }
338 }
339 }
340 } else {
341 albums = self.parse_albums_from_rows(document, artist)?;
343 }
344
345 let (has_next_page, total_pages) = self.parse_pagination(document, page_number)?;
346
347 Ok(AlbumPage {
348 albums,
349 page_number,
350 has_next_page,
351 total_pages,
352 })
353 }
354
355 fn parse_albums_from_rows(&self, document: &Html, artist: &str) -> Result<Vec<Album>> {
357 let mut albums = Vec::new();
358 let table_selector = Selector::parse("table.chartlist").unwrap();
359 let row_selector = Selector::parse("tbody tr").unwrap();
360
361 for table in document.select(&table_selector) {
362 for row in table.select(&row_selector) {
363 if let Ok(mut album) = self.parse_album_row(&row) {
364 album.artist = artist.to_string();
365 albums.push(album);
366 }
367 }
368 }
369 Ok(albums)
370 }
371
372 pub fn parse_album_row(&self, row: &scraper::ElementRef) -> Result<Album> {
374 let name = self.extract_name_from_row(row, "album")?;
376
377 let playcount = self.extract_playcount_from_row(row);
379
380 let artist = "".to_string(); Ok(Album {
383 name,
384 artist,
385 playcount,
386 timestamp: None, })
388 }
389
390 fn extract_name_from_row(&self, row: &scraper::ElementRef, item_type: &str) -> Result<String> {
394 let name_selector = Selector::parse(".chartlist-name a").unwrap();
395 let name = row
396 .select(&name_selector)
397 .next()
398 .map(|el| el.text().collect::<String>().trim().to_string())
399 .ok_or_else(|| LastFmError::Parse(format!("Missing {item_type} name")))?;
400 Ok(name)
401 }
402
403 fn extract_playcount_from_row(&self, row: &scraper::ElementRef) -> u32 {
405 let playcount_selector = Selector::parse(".chartlist-count-bar-value").unwrap();
406 let mut playcount = 1; if let Some(element) = row.select(&playcount_selector).next() {
409 let text = element.text().collect::<String>().trim().to_string();
410 if let Some(number_part) = text.split_whitespace().next() {
412 if let Ok(count) = number_part.parse::<u32>() {
413 playcount = count;
414 }
415 }
416 }
417 playcount
418 }
419
420 pub fn parse_pagination(
422 &self,
423 document: &Html,
424 _current_page: u32,
425 ) -> Result<(bool, Option<u32>)> {
426 let pagination_selector = Selector::parse(".pagination-list").unwrap();
427
428 if let Some(pagination) = document.select(&pagination_selector).next() {
429 let next_selectors = [
431 "a[aria-label=\"Next\"]",
432 ".pagination-next a",
433 "a:contains(\"Next\")",
434 ".next a",
435 ];
436
437 let mut has_next = false;
438 for selector_str in &next_selectors {
439 if let Ok(selector) = Selector::parse(selector_str) {
440 if pagination.select(&selector).next().is_some() {
441 has_next = true;
442 break;
443 }
444 }
445 }
446
447 let total_pages = self.extract_total_pages_from_pagination(&pagination);
449
450 Ok((has_next, total_pages))
451 } else {
452 Ok((false, Some(1)))
454 }
455 }
456
457 fn extract_total_pages_from_pagination(&self, pagination: &scraper::ElementRef) -> Option<u32> {
459 let text = pagination.text().collect::<String>();
461 if let Some(of_pos) = text.find(" of ") {
462 let after_of = &text[of_pos + 4..];
463 if let Some(number_end) = after_of.find(|c: char| !c.is_ascii_digit()) {
464 if let Ok(total) = after_of[..number_end].parse::<u32>() {
465 return Some(total);
466 }
467 } else if let Ok(total) = after_of.trim().parse::<u32>() {
468 return Some(total);
469 }
470 }
471 None
472 }
473
474 fn parse_json_tracks_page(
477 &self,
478 _document: &Html,
479 _page: u32,
480 _artist: &str,
481 ) -> Result<TrackPage> {
482 Err(crate::LastFmError::Parse(
484 "JSON parsing not implemented".to_string(),
485 ))
486 }
487
488 pub fn find_timestamp_for_track(&self, _document: &Html, _track_name: &str) -> Option<u64> {
491 None
493 }
494
495 pub fn find_playcount_for_track(&self, document: &Html, track_name: &str) -> Result<u32> {
496 let count_selector = Selector::parse(".chartlist-count-bar-value").unwrap();
498 let link_selector = Selector::parse("a[href*=\"/music/\"]").unwrap();
499
500 for link in document.select(&link_selector) {
502 let link_text = link.text().collect::<String>().trim().to_string();
503 if link_text == track_name {
504 if let Some(row) = self.find_ancestor_row(link) {
505 if let Some(count_element) = row.select(&count_selector).next() {
506 let text = count_element.text().collect::<String>().trim().to_string();
507 if let Some(number_part) = text.split_whitespace().next() {
508 if let Ok(count) = number_part.parse::<u32>() {
509 return Ok(count);
510 }
511 }
512 }
513 }
514 }
515 }
516 Err(LastFmError::Parse(format!(
517 "Could not find playcount for track: {track_name}"
518 )))
519 }
520
521 pub fn find_timestamp_for_album(&self, _document: &Html, _album_name: &str) -> Option<u64> {
522 None
524 }
525
526 pub fn find_playcount_for_album(&self, document: &Html, album_name: &str) -> Result<u32> {
527 let count_selector = Selector::parse(".chartlist-count-bar-value").unwrap();
529 let link_selector = Selector::parse("a[href*=\"/music/\"]").unwrap();
530
531 for link in document.select(&link_selector) {
533 let link_text = link.text().collect::<String>().trim().to_string();
534 if link_text == album_name {
535 if let Some(row) = self.find_ancestor_row(link) {
536 if let Some(count_element) = row.select(&count_selector).next() {
537 let text = count_element.text().collect::<String>().trim().to_string();
538 if let Some(number_part) = text.split_whitespace().next() {
539 if let Ok(count) = number_part.parse::<u32>() {
540 return Ok(count);
541 }
542 }
543 }
544 }
545 }
546 }
547 Err(LastFmError::Parse(format!(
548 "Could not find playcount for album: {album_name}"
549 )))
550 }
551
552 pub fn find_ancestor_row<'a>(
553 &self,
554 element: scraper::ElementRef<'a>,
555 ) -> Option<scraper::ElementRef<'a>> {
556 let mut current = element;
557 while let Some(parent) = current.parent() {
558 if let Some(parent_elem) = scraper::ElementRef::wrap(parent) {
559 if parent_elem.value().name() == "tr" {
560 return Some(parent_elem);
561 }
562 current = parent_elem;
563 } else {
564 break;
565 }
566 }
567 None
568 }
569}
570
571impl Default for LastFmParser {
572 fn default() -> Self {
573 Self::new()
574 }
575}