1use rand::seq::SliceRandom;
2use url::Url;
3
4use crate::cli::{DigArgs, SampleMode};
5use crate::error::{AppError, Result};
6use crate::fetch::Fetcher;
7use crate::model::{
8 CollectorsOutput, CrawlSummary, DigOutput, LibraryOutput, Platform, ResolveOutput,
9};
10use crate::parser;
11use crate::score::{rank_candidates, ScoreOptions};
12use crate::soundcloud;
13
14pub fn detect_platform(url: &str) -> Result<Platform> {
15 let parsed = Url::parse(url)?;
16 let Some(host) = parsed.host_str() else {
17 return Err(AppError::InvalidInput(format!(
18 "unsupported platform for URL: {url}"
19 )));
20 };
21
22 let host = host.to_ascii_lowercase();
23 if host == "bandcamp.com" || host.ends_with(".bandcamp.com") {
24 return Ok(Platform::Bandcamp);
25 }
26 if host == "soundcloud.com" || host.ends_with(".soundcloud.com") || host == "on.soundcloud.com"
27 {
28 return Ok(Platform::Soundcloud);
29 }
30
31 Err(AppError::InvalidInput(format!(
32 "unsupported platform for URL: {url}"
33 )))
34}
35
36pub async fn resolve_command(fetcher: &mut Fetcher, item_url: &str) -> Result<ResolveOutput> {
37 match detect_platform(item_url)? {
38 Platform::Bandcamp => resolve_bandcamp(fetcher, item_url).await,
39 Platform::Soundcloud => resolve_soundcloud(fetcher, item_url).await,
40 }
41}
42
43pub async fn collectors_command(fetcher: &mut Fetcher, args: &DigArgs) -> Result<CollectorsOutput> {
44 match detect_platform(&args.album_url)? {
45 Platform::Bandcamp => collectors_bandcamp(fetcher, args).await,
46 Platform::Soundcloud => Err(AppError::UnsupportedPlatformFeature {
47 platform: Platform::Soundcloud.as_str().to_string(),
48 feature: "collectors".to_string(),
49 }),
50 }
51}
52
53pub async fn library_command(
54 fetcher: &mut Fetcher,
55 fan_url: &str,
56 limit: usize,
57) -> Result<LibraryOutput> {
58 match detect_platform(fan_url)? {
59 Platform::Bandcamp => library_bandcamp(fetcher, fan_url, limit).await,
60 Platform::Soundcloud => Err(AppError::UnsupportedPlatformFeature {
61 platform: Platform::Soundcloud.as_str().to_string(),
62 feature: "library".to_string(),
63 }),
64 }
65}
66
67pub async fn dig_command(fetcher: &mut Fetcher, args: &DigArgs) -> Result<DigOutput> {
68 match detect_platform(&args.album_url)? {
69 Platform::Bandcamp => dig_bandcamp(fetcher, args).await,
70 Platform::Soundcloud => dig_soundcloud(fetcher, args).await,
71 }
72}
73
74async fn resolve_bandcamp(fetcher: &mut Fetcher, album_url: &str) -> Result<ResolveOutput> {
75 let normalized = parser::normalize_url(album_url)?;
76 let html = fetcher.fetch_text(&normalized).await?;
77 let seed = parser::resolve_seed(&normalized, &html)?;
78 Ok(ResolveOutput { seed })
79}
80
81async fn resolve_soundcloud(fetcher: &mut Fetcher, track_url: &str) -> Result<ResolveOutput> {
82 let normalized = soundcloud::normalize_url(track_url)?;
83 let html = fetcher.fetch_text(&normalized).await?;
84 let client_id = soundcloud::extract_client_id(&html)?;
85 let resolve_url = soundcloud::resolve_api_url(&client_id, &normalized)?;
86 let json = fetcher.fetch_text(&resolve_url).await?;
87 let seed = soundcloud::resolve_api_seed(&json)?;
88 Ok(ResolveOutput { seed })
89}
90
91async fn collectors_bandcamp(fetcher: &mut Fetcher, args: &DigArgs) -> Result<CollectorsOutput> {
92 let resolved = resolve_bandcamp(fetcher, &args.album_url).await?;
93 let html = fetcher.fetch_text(&resolved.seed.url).await?;
94 let collectors = sample_collectors(
95 parser::parse_collectors(&html),
96 args.max_collectors,
97 args.sample,
98 );
99 if collectors.is_empty() {
100 return Err(AppError::NoPublicData);
101 }
102
103 Ok(CollectorsOutput {
104 seed: resolved.seed,
105 collectors_discovered: collectors.len(),
106 collectors,
107 })
108}
109
110async fn library_bandcamp(
111 fetcher: &mut Fetcher,
112 fan_url: &str,
113 limit: usize,
114) -> Result<LibraryOutput> {
115 let normalized = parser::normalize_url(fan_url)?;
116 let html = fetcher.fetch_text(&normalized).await?;
117 let mut albums = parser::parse_owned_albums(&html);
118 albums.sort_by(|a, b| a.artist.cmp(&b.artist).then_with(|| a.title.cmp(&b.title)));
119 albums.truncate(limit);
120
121 if albums.is_empty() {
122 return Err(AppError::NoPublicData);
123 }
124
125 Ok(LibraryOutput {
126 collector_url: normalized,
127 albums,
128 })
129}
130
131async fn dig_bandcamp(fetcher: &mut Fetcher, args: &DigArgs) -> Result<DigOutput> {
132 let resolved = resolve_bandcamp(fetcher, &args.album_url).await?;
133 let seed_html = fetcher.fetch_text(&resolved.seed.url).await?;
134 let discovered_collectors = parser::parse_collectors(&seed_html);
135 let sampled_collectors = sample_collectors(
136 discovered_collectors.clone(),
137 args.max_collectors,
138 args.sample,
139 );
140
141 if sampled_collectors.is_empty() {
142 return Err(AppError::NoPublicData);
143 }
144
145 let mut collector_albums = Vec::new();
146 let mut collectors_scanned = 0usize;
147 let mut collectors_skipped = 0usize;
148
149 for collector in &sampled_collectors {
150 match library_bandcamp(fetcher, &collector.url, usize::MAX).await {
151 Ok(library) => {
152 collectors_scanned += 1;
153 collector_albums.push((collector.handle.clone(), library.albums));
154 }
155 Err(AppError::NoPublicData) => collectors_skipped += 1,
156 Err(_) => collectors_skipped += 1,
157 }
158 }
159
160 let results = rank_candidates(
161 &resolved.seed,
162 collector_albums,
163 &ScoreOptions {
164 min_overlap: args.min_overlap,
165 exclude_artist: args.exclude_artist,
166 exclude_label: args.exclude_label,
167 required_tags: args.tag.clone(),
168 source_label_plural: "collectors",
169 sort: args.sort,
170 limit: args.limit,
171 },
172 );
173
174 if results.is_empty() {
175 return Err(AppError::NoPublicData);
176 }
177
178 let summary = CrawlSummary {
179 collectors_discovered: discovered_collectors.len(),
180 collectors_sampled: sampled_collectors.len(),
181 collectors_scanned,
182 collectors_skipped,
183 candidates_ranked: results.len(),
184 cache_hits: fetcher.stats.hits,
185 cache_misses: fetcher.stats.misses,
186 };
187
188 Ok(DigOutput {
189 seed: resolved.seed,
190 summary,
191 results,
192 })
193}
194
195async fn dig_soundcloud(fetcher: &mut Fetcher, args: &DigArgs) -> Result<DigOutput> {
196 let normalized = soundcloud::normalize_url(&args.album_url)?;
197 let seed_html = fetcher.fetch_text(&normalized).await?;
198 let client_id = soundcloud::extract_client_id(&seed_html)?;
199 let resolve_url = soundcloud::resolve_api_url(&client_id, &normalized)?;
200 let resolve_json = fetcher.fetch_text(&resolve_url).await?;
201 let seed = soundcloud::resolve_api_seed(&resolve_json)?;
202
203 if seed.kind != crate::model::ItemKind::Track {
204 return Err(AppError::UnsupportedPlatformFeature {
205 platform: Platform::Soundcloud.as_str().to_string(),
206 feature: "playlist dig".to_string(),
207 });
208 }
209
210 let Some(seed_track_id) = seed.release_id.clone() else {
211 return Err(AppError::Parse(
212 "unable to determine SoundCloud track id".to_string(),
213 ));
214 };
215 let liker_limit = (args.max_collectors.saturating_mul(20))
216 .max(args.min_overlap)
217 .min(200);
218 let likers_url = soundcloud::likers_url(&client_id, &seed_track_id, liker_limit)?;
219 let likers_json = fetcher.fetch_text(&likers_url).await?;
220 let discovered_likers = soundcloud::parse_likers(&likers_json)?;
221 let discovered_count = discovered_likers.len();
222 let mut sampled_sources = discovered_likers;
223 if let SampleMode::Random = args.sample {
224 let mut rng = rand::thread_rng();
225 sampled_sources.shuffle(&mut rng);
226 }
227 if sampled_sources.is_empty() {
228 return Err(AppError::NoPublicData);
229 }
230
231 let mut source_tracks = Vec::new();
232 let mut likers_scanned = 0usize;
233 let mut likers_skipped = 0usize;
234 let mut likers_attempted = 0usize;
235
236 for liker in &sampled_sources {
237 if source_tracks.len() >= args.max_collectors {
238 break;
239 }
240
241 likers_attempted += 1;
242 let mut next_url = Some(soundcloud::user_likes_url(&client_id, &liker.id, 100)?);
243 let mut page_count = 0usize;
244 let mut found_source = false;
245
246 while let Some(url) = next_url.take() {
247 page_count += 1;
248 if page_count > 4 {
249 break;
250 }
251
252 let page_url = soundcloud::with_client_id(&url, &client_id)?;
253 let likes_json = fetcher.fetch_text(&page_url).await?;
254 match soundcloud::parse_user_likes_page(&likes_json, liker, &seed_track_id, 2) {
255 Ok(page) => {
256 if let Some(source) = page.source {
257 likers_scanned += 1;
258 source_tracks.push((source.title, source.tracks));
259 found_source = true;
260 break;
261 }
262 next_url = page.next_href;
263 }
264 Err(_) => {
265 next_url = None;
266 }
267 }
268 }
269
270 if !found_source {
271 likers_skipped += 1;
272 }
273 }
274
275 if source_tracks.is_empty() {
276 return Err(AppError::NoPublicData);
277 }
278
279 let results = rank_candidates(
280 &seed,
281 source_tracks,
282 &ScoreOptions {
283 min_overlap: args.min_overlap,
284 exclude_artist: args.exclude_artist,
285 exclude_label: args.exclude_label,
286 required_tags: args.tag.clone(),
287 source_label_plural: "likers",
288 sort: args.sort,
289 limit: args.limit,
290 },
291 );
292
293 if results.is_empty() {
294 return Err(AppError::NoPublicData);
295 }
296
297 let summary = CrawlSummary {
298 collectors_discovered: discovered_count,
299 collectors_sampled: likers_attempted,
300 collectors_scanned: likers_scanned,
301 collectors_skipped: likers_skipped,
302 candidates_ranked: results.len(),
303 cache_hits: fetcher.stats.hits,
304 cache_misses: fetcher.stats.misses,
305 };
306
307 Ok(DigOutput {
308 seed,
309 summary,
310 results,
311 })
312}
313
314fn sample_collectors(
315 mut collectors: Vec<crate::model::Collector>,
316 max_collectors: usize,
317 sample_mode: SampleMode,
318) -> Vec<crate::model::Collector> {
319 if let SampleMode::Random = sample_mode {
320 let mut rng = rand::thread_rng();
321 collectors.shuffle(&mut rng);
322 }
323
324 collectors.truncate(max_collectors);
325 collectors
326}
327
328#[cfg(test)]
329mod tests {
330 use super::*;
331
332 #[test]
333 fn detects_bandcamp_platform() {
334 assert_eq!(
335 detect_platform("https://artist.bandcamp.com/album/test").unwrap(),
336 Platform::Bandcamp
337 );
338 }
339
340 #[test]
341 fn detects_soundcloud_platform() {
342 assert_eq!(
343 detect_platform("https://soundcloud.com/test-user/test-track").unwrap(),
344 Platform::Soundcloud
345 );
346 }
347}