1pub mod providers;
2pub mod stream;
3pub mod traits;
4
5use crate::core::{
6 analysis::ProjectAnalysis,
7 error::{AnalysisError, Result},
8 filter::IntelligentFilter,
9};
10use providers::*;
11use reqwest::Client;
12use std::collections::HashMap;
13use std::sync::Arc;
14use traits::{GitProvider, NoOpProgressHook};
15
16pub use traits::{ParsedRepository, ProgressHook, ProviderConfig};
17
18pub struct RemoteAnalyzer {
40 providers: Vec<Box<dyn GitProvider>>,
41 global_config: ProviderConfig,
42 filter: IntelligentFilter,
43 progress_hook: Arc<dyn ProgressHook>,
44 provider_configs: HashMap<String, ProviderConfig>,
45}
46
47impl RemoteAnalyzer {
48 pub fn new() -> Self {
50 let mut analyzer = Self {
51 providers: Vec::new(),
52 global_config: ProviderConfig::default(),
53 filter: IntelligentFilter::default(),
54 progress_hook: Arc::new(NoOpProgressHook),
55 provider_configs: HashMap::new(),
56 };
57
58 analyzer.register_default_providers();
59 analyzer
60 }
61
62 fn register_default_providers(&mut self) {
64 self.providers.push(Box::new(GitHubProvider::new()));
65 self.providers.push(Box::new(GitLabProvider::new()));
66 self.providers.push(Box::new(BitbucketProvider::new()));
67 self.providers.push(Box::new(CodebergProvider::new()));
68 self.providers.push(Box::new(GiteaProvider::new()));
69 self.providers.push(Box::new(SourceForgeProvider::new()));
70 self.providers.push(Box::new(AzureDevOpsProvider::new()));
71 self.providers.push(Box::new(ArchiveProvider::new()));
72 }
73
74 pub fn set_progress_hook<H: ProgressHook + 'static>(&mut self, hook: H) {
100 self.progress_hook = Arc::new(hook);
101 }
102
103 pub fn set_global_config(&mut self, config: ProviderConfig) {
121 self.global_config = config;
122 self.apply_config_to_providers();
123 }
124
125 pub fn set_provider_config(&mut self, provider_name: &str, config: ProviderConfig) {
143 self.provider_configs
144 .insert(provider_name.to_string(), config);
145 self.apply_config_to_providers();
146 }
147
148 fn apply_config_to_providers(&mut self) {
150 for provider in &mut self.providers {
151 let provider_name = provider.name();
152
153 let mut config = self.global_config.clone();
155
156 if let Some(provider_config) = self.provider_configs.get(provider_name) {
158 config.headers.extend(provider_config.headers.clone());
160 config
161 .credentials
162 .extend(provider_config.credentials.clone());
163 config
164 .provider_settings
165 .extend(provider_config.provider_settings.clone());
166
167 if provider_config.timeout.is_some() {
168 config.timeout = provider_config.timeout;
169 }
170 if provider_config.max_redirects.is_some() {
171 config.max_redirects = provider_config.max_redirects;
172 }
173 if provider_config.user_agent.is_some() {
174 config.user_agent = provider_config.user_agent.clone();
175 }
176 if provider_config.max_file_size.is_some() {
177 config.max_file_size = provider_config.max_file_size;
178 }
179 if provider_config.proxy.is_some() {
180 config.proxy = provider_config.proxy.clone();
181 }
182
183 config.accept_invalid_certs = provider_config.accept_invalid_certs;
184 config.use_compression = provider_config.use_compression;
185 }
186
187 provider.apply_config(&config);
188 }
189 }
190
191 pub fn set_filter(&mut self, filter: IntelligentFilter) {
196 self.filter = filter;
197 }
198
199 pub fn set_aggressive_filtering(&mut self, enabled: bool) {
204 if enabled {
205 self.filter = IntelligentFilter::aggressive();
206 } else {
207 self.filter = IntelligentFilter::default();
208 }
209 }
210
211 pub fn set_timeout(&mut self, timeout: u64) {
218 self.global_config.timeout = Some(timeout);
219 self.apply_config_to_providers();
220 }
221
222 pub fn set_allow_insecure(&mut self, allow_insecure: bool) {
227 self.global_config.accept_invalid_certs = allow_insecure;
228 self.apply_config_to_providers();
229 }
230
231 pub fn set_provider_credentials(
237 &mut self,
238 provider_name: &str,
239 credentials: HashMap<String, String>,
240 ) {
241 let config = self
242 .provider_configs
243 .entry(provider_name.to_string())
244 .or_insert_with(ProviderConfig::default);
245
246 config.credentials.extend(credentials);
247 self.apply_config_to_providers();
248 }
249
250 pub async fn analyze_url(&self, url: &str) -> Result<ProjectAnalysis> {
276 let expanded_url = self.expand_url(url.trim());
277
278 if expanded_url.ends_with(".tar.gz") || expanded_url.ends_with(".tgz") {
280 return self.analyze_direct_tarball(&expanded_url).await;
281 }
282
283 for provider in &self.providers {
285 if provider.can_handle(&expanded_url) {
286 if let Some(parsed) = provider.parse_url(&expanded_url) {
287 return self.analyze_with_provider(provider.as_ref(), &parsed).await;
288 }
289 }
290 }
291
292 Err(AnalysisError::url_parsing(format!(
293 "Unsupported URL format: {}. Supported formats include GitHub, GitLab, Bitbucket, Codeberg, Gitea, SourceForge, Azure DevOps, and direct archive URLs.",
294 expanded_url
295 )))
296 }
297
298 async fn analyze_with_provider(
300 &self,
301 provider: &dyn GitProvider,
302 parsed: &ParsedRepository,
303 ) -> Result<ProjectAnalysis> {
304 let mut download_urls = provider.build_download_urls(parsed);
305
306 if download_urls.is_empty() && parsed.branch_or_commit.is_none() {
308 let mut branches = vec![
309 "main".to_string(),
310 "master".to_string(),
311 "develop".to_string(),
312 "dev".to_string(),
313 ];
314
315 let config = self.get_effective_config(provider.name());
316 if let Ok(client) = provider.build_client(&config) {
317 if let Some(default_branch) = provider.get_default_branch(&client, parsed).await {
318 branches.insert(0, default_branch);
319 branches.dedup();
320 }
321 }
322
323 for branch in branches {
324 let mut branch_parsed = parsed.clone();
325 branch_parsed.branch_or_commit = Some(branch);
326 download_urls.extend(provider.build_download_urls(&branch_parsed));
327 }
328 }
329
330 let mut failed_reasons: Vec<String> = Vec::new();
331 for download_url in download_urls {
332 match self
333 .analyze_direct_tarball_with_name(&download_url, &parsed.project_name)
334 .await
335 {
336 Ok(analysis) => return Ok(analysis),
337 Err(e) => {
338 let reason = format!("Failed to download from {}: {}", download_url, e);
339 failed_reasons.push(reason.clone());
340
341 #[cfg(target_arch = "wasm32")]
342 {
343 web_sys::console::log_1(&reason.into());
344 }
345
346 #[cfg(feature = "cli")]
347 {
348 log::debug!("Download failed: {}", reason);
349 }
350
351 continue;
352 }
353 }
354 }
355
356 Err(AnalysisError::network(format!(
357 "All download URLs failed. \n{}",
358 failed_reasons.join("\n")
359 )))
360 }
361
362 fn get_effective_config(&self, provider_name: &str) -> ProviderConfig {
364 let mut config = self.global_config.clone();
365
366 if let Some(provider_config) = self.provider_configs.get(provider_name) {
367 config.headers.extend(provider_config.headers.clone());
369 config
370 .credentials
371 .extend(provider_config.credentials.clone());
372 config
373 .provider_settings
374 .extend(provider_config.provider_settings.clone());
375
376 if provider_config.timeout.is_some() {
377 config.timeout = provider_config.timeout;
378 }
379 if provider_config.max_redirects.is_some() {
380 config.max_redirects = provider_config.max_redirects;
381 }
382 if provider_config.user_agent.is_some() {
383 config.user_agent = provider_config.user_agent.clone();
384 }
385 if provider_config.max_file_size.is_some() {
386 config.max_file_size = provider_config.max_file_size;
387 }
388 if provider_config.proxy.is_some() {
389 config.proxy = provider_config.proxy.clone();
390 }
391
392 config.accept_invalid_certs = provider_config.accept_invalid_certs;
393 config.use_compression = provider_config.use_compression;
394 }
395
396 config
397 }
398
399 async fn analyze_direct_tarball(&self, url: &str) -> Result<ProjectAnalysis> {
401 let project_name = self.extract_project_name_from_url(url);
402 self.analyze_direct_tarball_with_name(url, &project_name)
403 .await
404 }
405
406 async fn analyze_direct_tarball_with_name(
408 &self,
409 url: &str,
410 project_name: &str,
411 ) -> Result<ProjectAnalysis> {
412 let mut project_analysis = ProjectAnalysis::new(project_name);
413
414 let client = self.build_global_client()?;
415
416 let response = client.get(url).send().await.map_err(|e| {
417 crate::core::error::AnalysisError::network(format!("Failed to fetch URL: {}", e))
418 })?;
419
420 if !response.status().is_success() {
421 return Err(crate::core::error::AnalysisError::network(format!(
422 "HTTP request failed with status: {}",
423 response.status()
424 )));
425 }
426
427 let total_size = response.content_length();
428 self.progress_hook.on_download_progress(0, total_size);
429
430 #[cfg(not(target_arch = "wasm32"))]
431 {
432 let stream = response.bytes_stream();
433 let progress_hook = Arc::clone(&self.progress_hook);
434 let stream_reader = stream::StreamReader::new(
435 stream,
436 Box::new(move |downloaded, total| {
437 progress_hook.on_download_progress(downloaded, total);
438 log::debug!(
439 "Downloaded: {} bytes of {} total",
440 downloaded,
441 total
442 .map(|t| t.to_string())
443 .unwrap_or_else(|| "unknown".to_string())
444 );
445 }),
446 total_size,
447 );
448
449 self.progress_hook.on_processing_start("Processing...");
450 stream::process_tarball_stream(
451 stream_reader,
452 &mut project_analysis,
453 &self.filter,
454 self.progress_hook.as_ref(),
455 )
456 .await?;
457 }
458
459 #[cfg(target_arch = "wasm32")]
460 {
461 let bytes = response.bytes().await.map_err(|e| {
462 crate::core::error::AnalysisError::network(format!(
463 "Failed to read response bytes: {}",
464 e
465 ))
466 })?;
467
468 self.progress_hook
469 .on_download_progress(bytes.len() as u64, total_size);
470 self.progress_hook.on_processing_start("Processing...");
471
472 stream::process_tarball(
473 bytes,
474 &mut project_analysis,
475 &self.filter,
476 self.progress_hook.as_ref(),
477 )
478 .await?;
479 }
480
481 Ok(project_analysis)
482 }
483
484 fn build_global_client(&self) -> Result<Client> {
486 let archive_provider = ArchiveProvider::new();
488 archive_provider
489 .build_client(&self.global_config)
490 .map_err(|e| {
491 crate::core::error::AnalysisError::network(format!(
492 "Failed to build HTTP client: {}",
493 e
494 ))
495 })
496 }
497
498 fn expand_url(&self, url: &str) -> String {
500 if url.starts_with("http://") || url.starts_with("https://") {
501 return url.to_string();
502 }
503
504 if url.contains('/') && !url.starts_with("http://") && !url.starts_with("https://") {
506 let parts: Vec<&str> = url.split('@').collect();
507 let repo_part = parts[0];
508 let branch_or_commit = parts.get(1);
509
510 let path_parts: Vec<&str> = repo_part.split('/').collect();
511 if path_parts.len() == 2 {
512 if let Some(branch) = branch_or_commit {
513 if branch.len() >= 7 && branch.chars().all(|c| c.is_ascii_hexdigit()) {
515 return format!("https://github.com/{}/commit/{}", repo_part, branch);
516 } else {
517 return format!("https://github.com/{}/tree/{}", repo_part, branch);
518 }
519 } else {
520 return format!("https://github.com/{}", repo_part);
521 }
522 }
523 }
524
525 url.to_string()
526 }
527
528 fn extract_project_name_from_url(&self, url: &str) -> String {
530 let url_path = url.trim_end_matches('/');
531
532 if let Some(filename) = url_path.split('/').next_back() {
533 if filename.ends_with(".tar.gz") {
534 return filename.trim_end_matches(".tar.gz").to_string();
535 }
536 if filename.ends_with(".tgz") {
537 return filename.trim_end_matches(".tgz").to_string();
538 }
539 return filename.to_string();
540 }
541
542 "remote-project".to_string()
543 }
544}
545
546impl Default for RemoteAnalyzer {
547 fn default() -> Self {
548 Self::new()
549 }
550}