Skip to main content

wax/
provider.rs

1use rand::seq::SliceRandom;
2use url::Url;
3
4use crate::cli::{DigArgs, SampleMode};
5use crate::error::{AppError, Result};
6use crate::fetch::Fetcher;
7use crate::model::{
8    CollectorsOutput, CrawlSummary, DigOutput, LibraryOutput, Platform, ResolveOutput,
9};
10use crate::parser;
11use crate::score::{rank_candidates, ScoreOptions};
12use crate::soundcloud;
13
14pub fn detect_platform(url: &str) -> Result<Platform> {
15    let parsed = Url::parse(url)?;
16    let Some(host) = parsed.host_str() else {
17        return Err(AppError::InvalidInput(format!(
18            "unsupported platform for URL: {url}"
19        )));
20    };
21
22    let host = host.to_ascii_lowercase();
23    if host == "bandcamp.com" || host.ends_with(".bandcamp.com") {
24        return Ok(Platform::Bandcamp);
25    }
26    if host == "soundcloud.com" || host.ends_with(".soundcloud.com") || host == "on.soundcloud.com"
27    {
28        return Ok(Platform::Soundcloud);
29    }
30
31    Err(AppError::InvalidInput(format!(
32        "unsupported platform for URL: {url}"
33    )))
34}
35
36pub async fn resolve_command(fetcher: &mut Fetcher, item_url: &str) -> Result<ResolveOutput> {
37    match detect_platform(item_url)? {
38        Platform::Bandcamp => resolve_bandcamp(fetcher, item_url).await,
39        Platform::Soundcloud => resolve_soundcloud(fetcher, item_url).await,
40    }
41}
42
43pub async fn collectors_command(fetcher: &mut Fetcher, args: &DigArgs) -> Result<CollectorsOutput> {
44    match detect_platform(&args.album_url)? {
45        Platform::Bandcamp => collectors_bandcamp(fetcher, args).await,
46        Platform::Soundcloud => Err(AppError::UnsupportedPlatformFeature {
47            platform: Platform::Soundcloud.as_str().to_string(),
48            feature: "collectors".to_string(),
49        }),
50    }
51}
52
53pub async fn library_command(
54    fetcher: &mut Fetcher,
55    fan_url: &str,
56    limit: usize,
57) -> Result<LibraryOutput> {
58    match detect_platform(fan_url)? {
59        Platform::Bandcamp => library_bandcamp(fetcher, fan_url, limit).await,
60        Platform::Soundcloud => Err(AppError::UnsupportedPlatformFeature {
61            platform: Platform::Soundcloud.as_str().to_string(),
62            feature: "library".to_string(),
63        }),
64    }
65}
66
67pub async fn dig_command(fetcher: &mut Fetcher, args: &DigArgs) -> Result<DigOutput> {
68    match detect_platform(&args.album_url)? {
69        Platform::Bandcamp => dig_bandcamp(fetcher, args).await,
70        Platform::Soundcloud => dig_soundcloud(fetcher, args).await,
71    }
72}
73
74async fn resolve_bandcamp(fetcher: &mut Fetcher, album_url: &str) -> Result<ResolveOutput> {
75    let normalized = parser::normalize_url(album_url)?;
76    let html = fetcher.fetch_text(&normalized).await?;
77    let seed = parser::resolve_seed(&normalized, &html)?;
78    Ok(ResolveOutput { seed })
79}
80
81async fn resolve_soundcloud(fetcher: &mut Fetcher, track_url: &str) -> Result<ResolveOutput> {
82    let normalized = soundcloud::normalize_url(track_url)?;
83    let html = fetcher.fetch_text(&normalized).await?;
84    let client_id = soundcloud::extract_client_id(&html)?;
85    let resolve_url = soundcloud::resolve_api_url(&client_id, &normalized)?;
86    let json = fetcher.fetch_text(&resolve_url).await?;
87    let seed = soundcloud::resolve_api_seed(&json)?;
88    Ok(ResolveOutput { seed })
89}
90
91async fn collectors_bandcamp(fetcher: &mut Fetcher, args: &DigArgs) -> Result<CollectorsOutput> {
92    let resolved = resolve_bandcamp(fetcher, &args.album_url).await?;
93    let html = fetcher.fetch_text(&resolved.seed.url).await?;
94    let collectors = sample_collectors(
95        parser::parse_collectors(&html),
96        args.max_collectors,
97        args.sample,
98    );
99    if collectors.is_empty() {
100        return Err(AppError::NoPublicData);
101    }
102
103    Ok(CollectorsOutput {
104        seed: resolved.seed,
105        collectors_discovered: collectors.len(),
106        collectors,
107    })
108}
109
110async fn library_bandcamp(
111    fetcher: &mut Fetcher,
112    fan_url: &str,
113    limit: usize,
114) -> Result<LibraryOutput> {
115    let normalized = parser::normalize_url(fan_url)?;
116    let html = fetcher.fetch_text(&normalized).await?;
117    let mut albums = parser::parse_owned_albums(&html);
118    albums.sort_by(|a, b| a.artist.cmp(&b.artist).then_with(|| a.title.cmp(&b.title)));
119    albums.truncate(limit);
120
121    if albums.is_empty() {
122        return Err(AppError::NoPublicData);
123    }
124
125    Ok(LibraryOutput {
126        collector_url: normalized,
127        albums,
128    })
129}
130
131async fn dig_bandcamp(fetcher: &mut Fetcher, args: &DigArgs) -> Result<DigOutput> {
132    let resolved = resolve_bandcamp(fetcher, &args.album_url).await?;
133    let seed_html = fetcher.fetch_text(&resolved.seed.url).await?;
134    let discovered_collectors = parser::parse_collectors(&seed_html);
135    let sampled_collectors = sample_collectors(
136        discovered_collectors.clone(),
137        args.max_collectors,
138        args.sample,
139    );
140
141    if sampled_collectors.is_empty() {
142        return Err(AppError::NoPublicData);
143    }
144
145    let mut collector_albums = Vec::new();
146    let mut collectors_scanned = 0usize;
147    let mut collectors_skipped = 0usize;
148
149    for collector in &sampled_collectors {
150        match library_bandcamp(fetcher, &collector.url, usize::MAX).await {
151            Ok(library) => {
152                collectors_scanned += 1;
153                collector_albums.push((collector.handle.clone(), library.albums));
154            }
155            Err(AppError::NoPublicData) => collectors_skipped += 1,
156            Err(_) => collectors_skipped += 1,
157        }
158    }
159
160    let results = rank_candidates(
161        &resolved.seed,
162        collector_albums,
163        &ScoreOptions {
164            min_overlap: args.min_overlap,
165            exclude_artist: args.exclude_artist,
166            exclude_label: args.exclude_label,
167            required_tags: args.tag.clone(),
168            source_label_plural: "collectors",
169            sort: args.sort,
170            limit: args.limit,
171        },
172    );
173
174    if results.is_empty() {
175        return Err(AppError::NoPublicData);
176    }
177
178    let summary = CrawlSummary {
179        collectors_discovered: discovered_collectors.len(),
180        collectors_sampled: sampled_collectors.len(),
181        collectors_scanned,
182        collectors_skipped,
183        candidates_ranked: results.len(),
184        cache_hits: fetcher.stats.hits,
185        cache_misses: fetcher.stats.misses,
186    };
187
188    Ok(DigOutput {
189        seed: resolved.seed,
190        summary,
191        results,
192    })
193}
194
195async fn dig_soundcloud(fetcher: &mut Fetcher, args: &DigArgs) -> Result<DigOutput> {
196    let normalized = soundcloud::normalize_url(&args.album_url)?;
197    let seed_html = fetcher.fetch_text(&normalized).await?;
198    let client_id = soundcloud::extract_client_id(&seed_html)?;
199    let resolve_url = soundcloud::resolve_api_url(&client_id, &normalized)?;
200    let resolve_json = fetcher.fetch_text(&resolve_url).await?;
201    let seed = soundcloud::resolve_api_seed(&resolve_json)?;
202
203    if seed.kind != crate::model::ItemKind::Track {
204        return Err(AppError::UnsupportedPlatformFeature {
205            platform: Platform::Soundcloud.as_str().to_string(),
206            feature: "playlist dig".to_string(),
207        });
208    }
209
210    let Some(seed_track_id) = seed.release_id.clone() else {
211        return Err(AppError::Parse(
212            "unable to determine SoundCloud track id".to_string(),
213        ));
214    };
215    let liker_limit = (args.max_collectors.saturating_mul(20))
216        .max(args.min_overlap)
217        .min(200);
218    let likers_url = soundcloud::likers_url(&client_id, &seed_track_id, liker_limit)?;
219    let likers_json = fetcher.fetch_text(&likers_url).await?;
220    let discovered_likers = soundcloud::parse_likers(&likers_json)?;
221    let discovered_count = discovered_likers.len();
222    let mut sampled_sources = discovered_likers;
223    if let SampleMode::Random = args.sample {
224        let mut rng = rand::thread_rng();
225        sampled_sources.shuffle(&mut rng);
226    }
227    if sampled_sources.is_empty() {
228        return Err(AppError::NoPublicData);
229    }
230
231    let mut source_tracks = Vec::new();
232    let mut likers_scanned = 0usize;
233    let mut likers_skipped = 0usize;
234    let mut likers_attempted = 0usize;
235
236    for liker in &sampled_sources {
237        if source_tracks.len() >= args.max_collectors {
238            break;
239        }
240
241        likers_attempted += 1;
242        let mut next_url = Some(soundcloud::user_likes_url(&client_id, &liker.id, 100)?);
243        let mut page_count = 0usize;
244        let mut found_source = false;
245
246        while let Some(url) = next_url.take() {
247            page_count += 1;
248            if page_count > 4 {
249                break;
250            }
251
252            let page_url = soundcloud::with_client_id(&url, &client_id)?;
253            let likes_json = fetcher.fetch_text(&page_url).await?;
254            match soundcloud::parse_user_likes_page(&likes_json, liker, &seed_track_id, 2) {
255                Ok(page) => {
256                    if let Some(source) = page.source {
257                        likers_scanned += 1;
258                        source_tracks.push((source.title, source.tracks));
259                        found_source = true;
260                        break;
261                    }
262                    next_url = page.next_href;
263                }
264                Err(_) => {
265                    next_url = None;
266                }
267            }
268        }
269
270        if !found_source {
271            likers_skipped += 1;
272        }
273    }
274
275    if source_tracks.is_empty() {
276        return Err(AppError::NoPublicData);
277    }
278
279    let results = rank_candidates(
280        &seed,
281        source_tracks,
282        &ScoreOptions {
283            min_overlap: args.min_overlap,
284            exclude_artist: args.exclude_artist,
285            exclude_label: args.exclude_label,
286            required_tags: args.tag.clone(),
287            source_label_plural: "likers",
288            sort: args.sort,
289            limit: args.limit,
290        },
291    );
292
293    if results.is_empty() {
294        return Err(AppError::NoPublicData);
295    }
296
297    let summary = CrawlSummary {
298        collectors_discovered: discovered_count,
299        collectors_sampled: likers_attempted,
300        collectors_scanned: likers_scanned,
301        collectors_skipped: likers_skipped,
302        candidates_ranked: results.len(),
303        cache_hits: fetcher.stats.hits,
304        cache_misses: fetcher.stats.misses,
305    };
306
307    Ok(DigOutput {
308        seed,
309        summary,
310        results,
311    })
312}
313
314fn sample_collectors(
315    mut collectors: Vec<crate::model::Collector>,
316    max_collectors: usize,
317    sample_mode: SampleMode,
318) -> Vec<crate::model::Collector> {
319    if let SampleMode::Random = sample_mode {
320        let mut rng = rand::thread_rng();
321        collectors.shuffle(&mut rng);
322    }
323
324    collectors.truncate(max_collectors);
325    collectors
326}
327
328#[cfg(test)]
329mod tests {
330    use super::*;
331
332    #[test]
333    fn detects_bandcamp_platform() {
334        assert_eq!(
335            detect_platform("https://artist.bandcamp.com/album/test").unwrap(),
336            Platform::Bandcamp
337        );
338    }
339
340    #[test]
341    fn detects_soundcloud_platform() {
342        assert_eq!(
343            detect_platform("https://soundcloud.com/test-user/test-track").unwrap(),
344            Platform::Soundcloud
345        );
346    }
347}