Skip to main content

bytes_radar/net/
mod.rs

1pub mod providers;
2pub mod stream;
3pub mod traits;
4
5use crate::core::{
6    analysis::ProjectAnalysis,
7    error::{AnalysisError, Result},
8    filter::IntelligentFilter,
9};
10use providers::*;
11use reqwest::Client;
12use std::collections::HashMap;
13use std::sync::Arc;
14use traits::{GitProvider, NoOpProgressHook};
15
16pub use traits::{ParsedRepository, ProgressHook, ProviderConfig};
17
18/// Remote repository analyzer with comprehensive configuration support
19///
20/// The RemoteAnalyzer supports multiple Git hosting providers and allows
21/// extensive customization of HTTP requests, authentication, and processing behavior.
22///
23/// # Examples
24///
25/// ```rust
26/// use bytes_radar::net::{RemoteAnalyzer, ProviderConfig};
27///
28/// // Basic usage
29/// let mut analyzer = RemoteAnalyzer::new();
30///
31/// // With custom configuration
32/// let config = ProviderConfig::new()
33///     .with_timeout(120)
34///     .with_header("X-Custom-Header", "value")
35///     .with_credential("token", "your-token");
36///
37/// analyzer.set_global_config(config);
38/// ```
39pub struct RemoteAnalyzer {
40    providers: Vec<Box<dyn GitProvider>>,
41    global_config: ProviderConfig,
42    filter: IntelligentFilter,
43    progress_hook: Arc<dyn ProgressHook>,
44    provider_configs: HashMap<String, ProviderConfig>,
45}
46
47impl RemoteAnalyzer {
48    /// Create a new analyzer with default configuration
49    pub fn new() -> Self {
50        let mut analyzer = Self {
51            providers: Vec::new(),
52            global_config: ProviderConfig::default(),
53            filter: IntelligentFilter::default(),
54            progress_hook: Arc::new(NoOpProgressHook),
55            provider_configs: HashMap::new(),
56        };
57
58        analyzer.register_default_providers();
59        analyzer
60    }
61
62    /// Register all default Git providers
63    fn register_default_providers(&mut self) {
64        self.providers.push(Box::new(GitHubProvider::new()));
65        self.providers.push(Box::new(GitLabProvider::new()));
66        self.providers.push(Box::new(BitbucketProvider::new()));
67        self.providers.push(Box::new(CodebergProvider::new()));
68        self.providers.push(Box::new(GiteaProvider::new()));
69        self.providers.push(Box::new(SourceForgeProvider::new()));
70        self.providers.push(Box::new(AzureDevOpsProvider::new()));
71        self.providers.push(Box::new(ArchiveProvider::new()));
72    }
73
74    /// Set a progress hook for monitoring operations
75    ///
76    /// # Arguments
77    /// * `hook` - Progress hook implementation
78    ///
79    /// # Examples
80    /// ```rust
81    /// use bytes_radar::net::{RemoteAnalyzer, ProgressHook};
82    ///
83    /// struct MyHook;
84    /// impl ProgressHook for MyHook {
85    ///     fn on_download_progress(&self, downloaded: u64, total: Option<u64>) {
86    ///         println!("Downloaded: {} bytes", downloaded);
87    ///     }
88    ///     fn on_processing_start(&self, message: &str) {
89    ///         println!("Processing: {}", message);
90    ///     }
91    ///     fn on_processing_progress(&self, current: usize, total: usize) {
92    ///         println!("Progress: {}/{}", current, total);
93    ///     }
94    /// }
95    ///
96    /// let mut analyzer = RemoteAnalyzer::new();
97    /// analyzer.set_progress_hook(MyHook);
98    /// ```
99    pub fn set_progress_hook<H: ProgressHook + 'static>(&mut self, hook: H) {
100        self.progress_hook = Arc::new(hook);
101    }
102
103    /// Set global configuration that applies to all providers
104    ///
105    /// # Arguments
106    /// * `config` - Global configuration
107    ///
108    /// # Examples
109    /// ```rust
110    /// use bytes_radar::net::{RemoteAnalyzer, ProviderConfig};
111    ///
112    /// let config = ProviderConfig::new()
113    ///     .with_timeout(300)
114    ///     .with_user_agent("my-app/1.0.0")
115    ///     .with_header("X-API-Key", "secret");
116    ///
117    /// let mut analyzer = RemoteAnalyzer::new();
118    /// analyzer.set_global_config(config);
119    /// ```
120    pub fn set_global_config(&mut self, config: ProviderConfig) {
121        self.global_config = config;
122        self.apply_config_to_providers();
123    }
124
125    /// Set configuration for a specific provider
126    ///
127    /// # Arguments
128    /// * `provider_name` - Name of the provider (e.g., "github", "gitlab")
129    /// * `config` - Provider-specific configuration
130    ///
131    /// # Examples
132    /// ```rust
133    /// use bytes_radar::net::{RemoteAnalyzer, ProviderConfig};
134    ///
135    /// let github_config = ProviderConfig::new()
136    ///     .with_credential("token", "github-token")
137    ///     .with_header("Accept", "application/vnd.github.v3+json");
138    ///
139    /// let mut analyzer = RemoteAnalyzer::new();
140    /// analyzer.set_provider_config("github", github_config);
141    /// ```
142    pub fn set_provider_config(&mut self, provider_name: &str, config: ProviderConfig) {
143        self.provider_configs
144            .insert(provider_name.to_string(), config);
145        self.apply_config_to_providers();
146    }
147
148    /// Apply configurations to all providers
149    fn apply_config_to_providers(&mut self) {
150        for provider in &mut self.providers {
151            let provider_name = provider.name();
152
153            // Start with global config
154            let mut config = self.global_config.clone();
155
156            // Override with provider-specific config if exists
157            if let Some(provider_config) = self.provider_configs.get(provider_name) {
158                // Merge configurations (provider-specific takes precedence)
159                config.headers.extend(provider_config.headers.clone());
160                config
161                    .credentials
162                    .extend(provider_config.credentials.clone());
163                config
164                    .provider_settings
165                    .extend(provider_config.provider_settings.clone());
166
167                if provider_config.timeout.is_some() {
168                    config.timeout = provider_config.timeout;
169                }
170                if provider_config.max_redirects.is_some() {
171                    config.max_redirects = provider_config.max_redirects;
172                }
173                if provider_config.user_agent.is_some() {
174                    config.user_agent = provider_config.user_agent.clone();
175                }
176                if provider_config.max_file_size.is_some() {
177                    config.max_file_size = provider_config.max_file_size;
178                }
179                if provider_config.proxy.is_some() {
180                    config.proxy = provider_config.proxy.clone();
181                }
182
183                config.accept_invalid_certs = provider_config.accept_invalid_certs;
184                config.use_compression = provider_config.use_compression;
185            }
186
187            provider.apply_config(&config);
188        }
189    }
190
191    /// Set file filtering configuration
192    ///
193    /// # Arguments
194    /// * `filter` - File filter configuration
195    pub fn set_filter(&mut self, filter: IntelligentFilter) {
196        self.filter = filter;
197    }
198
199    /// Enable or disable aggressive file filtering
200    ///
201    /// # Arguments
202    /// * `enabled` - Whether to enable aggressive filtering
203    pub fn set_aggressive_filtering(&mut self, enabled: bool) {
204        if enabled {
205            self.filter = IntelligentFilter::aggressive();
206        } else {
207            self.filter = IntelligentFilter::default();
208        }
209    }
210
211    // Legacy methods for backward compatibility
212
213    /// Set timeout for all providers (legacy method)
214    ///
215    /// # Arguments
216    /// * `timeout` - Timeout in seconds
217    pub fn set_timeout(&mut self, timeout: u64) {
218        self.global_config.timeout = Some(timeout);
219        self.apply_config_to_providers();
220    }
221
222    /// Set whether to accept invalid SSL certificates (legacy method)
223    ///
224    /// # Arguments
225    /// * `allow_insecure` - Whether to accept invalid certificates
226    pub fn set_allow_insecure(&mut self, allow_insecure: bool) {
227        self.global_config.accept_invalid_certs = allow_insecure;
228        self.apply_config_to_providers();
229    }
230
231    /// Set credentials for a specific provider (legacy method)
232    ///
233    /// # Arguments
234    /// * `provider_name` - Name of the provider
235    /// * `credentials` - Credentials map
236    pub fn set_provider_credentials(
237        &mut self,
238        provider_name: &str,
239        credentials: HashMap<String, String>,
240    ) {
241        let config = self
242            .provider_configs
243            .entry(provider_name.to_string())
244            .or_insert_with(ProviderConfig::default);
245
246        config.credentials.extend(credentials);
247        self.apply_config_to_providers();
248    }
249
250    /// Analyze a repository from its URL
251    ///
252    /// # Arguments
253    /// * `url` - Repository URL or shorthand notation
254    ///
255    /// # Examples
256    /// ```rust,no_run
257    /// use bytes_radar::net::RemoteAnalyzer;
258    ///
259    /// #[tokio::main]
260    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
261    ///     let analyzer = RemoteAnalyzer::new();
262    ///
263    ///     // Full URLs
264    ///     let analysis = analyzer.analyze_url("https://github.com/user/repo").await?;
265    ///
266    ///     // Shorthand notation
267    ///     let analysis = analyzer.analyze_url("user/repo@main").await?;
268    ///
269    ///     // Direct archive
270    ///     let analysis = analyzer.analyze_url("https://example.com/project.tar.gz").await?;
271    ///     
272    ///     Ok(())
273    /// }
274    /// ```
275    pub async fn analyze_url(&self, url: &str) -> Result<ProjectAnalysis> {
276        let expanded_url = self.expand_url(url.trim());
277
278        // Try direct archive first for better performance
279        if expanded_url.ends_with(".tar.gz") || expanded_url.ends_with(".tgz") {
280            return self.analyze_direct_tarball(&expanded_url).await;
281        }
282
283        // Try each provider
284        for provider in &self.providers {
285            if provider.can_handle(&expanded_url) {
286                if let Some(parsed) = provider.parse_url(&expanded_url) {
287                    return self.analyze_with_provider(provider.as_ref(), &parsed).await;
288                }
289            }
290        }
291
292        Err(AnalysisError::url_parsing(format!(
293            "Unsupported URL format: {}. Supported formats include GitHub, GitLab, Bitbucket, Codeberg, Gitea, SourceForge, Azure DevOps, and direct archive URLs.",
294            expanded_url
295        )))
296    }
297
298    /// Analyze using a specific provider
299    async fn analyze_with_provider(
300        &self,
301        provider: &dyn GitProvider,
302        parsed: &ParsedRepository,
303    ) -> Result<ProjectAnalysis> {
304        let mut download_urls = provider.build_download_urls(parsed);
305
306        // If no URLs and no specific branch/commit, try common branches
307        if download_urls.is_empty() && parsed.branch_or_commit.is_none() {
308            let mut branches = vec![
309                "main".to_string(),
310                "master".to_string(),
311                "develop".to_string(),
312                "dev".to_string(),
313            ];
314
315            let config = self.get_effective_config(provider.name());
316            if let Ok(client) = provider.build_client(&config) {
317                if let Some(default_branch) = provider.get_default_branch(&client, parsed).await {
318                    branches.insert(0, default_branch);
319                    branches.dedup();
320                }
321            }
322
323            for branch in branches {
324                let mut branch_parsed = parsed.clone();
325                branch_parsed.branch_or_commit = Some(branch);
326                download_urls.extend(provider.build_download_urls(&branch_parsed));
327            }
328        }
329
330        let mut failed_reasons: Vec<String> = Vec::new();
331        for download_url in download_urls {
332            match self
333                .analyze_direct_tarball_with_name(&download_url, &parsed.project_name)
334                .await
335            {
336                Ok(analysis) => return Ok(analysis),
337                Err(e) => {
338                    let reason = format!("Failed to download from {}: {}", download_url, e);
339                    failed_reasons.push(reason.clone());
340
341                    #[cfg(target_arch = "wasm32")]
342                    {
343                        web_sys::console::log_1(&reason.into());
344                    }
345
346                    #[cfg(feature = "cli")]
347                    {
348                        log::debug!("Download failed: {}", reason);
349                    }
350
351                    continue;
352                }
353            }
354        }
355
356        Err(AnalysisError::network(format!(
357            "All download URLs failed. \n{}",
358            failed_reasons.join("\n")
359        )))
360    }
361
362    /// Get effective configuration for a provider
363    fn get_effective_config(&self, provider_name: &str) -> ProviderConfig {
364        let mut config = self.global_config.clone();
365
366        if let Some(provider_config) = self.provider_configs.get(provider_name) {
367            // Merge configurations
368            config.headers.extend(provider_config.headers.clone());
369            config
370                .credentials
371                .extend(provider_config.credentials.clone());
372            config
373                .provider_settings
374                .extend(provider_config.provider_settings.clone());
375
376            if provider_config.timeout.is_some() {
377                config.timeout = provider_config.timeout;
378            }
379            if provider_config.max_redirects.is_some() {
380                config.max_redirects = provider_config.max_redirects;
381            }
382            if provider_config.user_agent.is_some() {
383                config.user_agent = provider_config.user_agent.clone();
384            }
385            if provider_config.max_file_size.is_some() {
386                config.max_file_size = provider_config.max_file_size;
387            }
388            if provider_config.proxy.is_some() {
389                config.proxy = provider_config.proxy.clone();
390            }
391
392            config.accept_invalid_certs = provider_config.accept_invalid_certs;
393            config.use_compression = provider_config.use_compression;
394        }
395
396        config
397    }
398
399    /// Analyze a direct archive URL
400    async fn analyze_direct_tarball(&self, url: &str) -> Result<ProjectAnalysis> {
401        let project_name = self.extract_project_name_from_url(url);
402        self.analyze_direct_tarball_with_name(url, &project_name)
403            .await
404    }
405
406    /// Analyze a direct archive URL with custom project name
407    async fn analyze_direct_tarball_with_name(
408        &self,
409        url: &str,
410        project_name: &str,
411    ) -> Result<ProjectAnalysis> {
412        let mut project_analysis = ProjectAnalysis::new(project_name);
413
414        let client = self.build_global_client()?;
415
416        let response = client.get(url).send().await.map_err(|e| {
417            crate::core::error::AnalysisError::network(format!("Failed to fetch URL: {}", e))
418        })?;
419
420        if !response.status().is_success() {
421            return Err(crate::core::error::AnalysisError::network(format!(
422                "HTTP request failed with status: {}",
423                response.status()
424            )));
425        }
426
427        let total_size = response.content_length();
428        self.progress_hook.on_download_progress(0, total_size);
429
430        #[cfg(not(target_arch = "wasm32"))]
431        {
432            let stream = response.bytes_stream();
433            let progress_hook = Arc::clone(&self.progress_hook);
434            let stream_reader = stream::StreamReader::new(
435                stream,
436                Box::new(move |downloaded, total| {
437                    progress_hook.on_download_progress(downloaded, total);
438                    log::debug!(
439                        "Downloaded: {} bytes of {} total",
440                        downloaded,
441                        total
442                            .map(|t| t.to_string())
443                            .unwrap_or_else(|| "unknown".to_string())
444                    );
445                }),
446                total_size,
447            );
448
449            self.progress_hook.on_processing_start("Processing...");
450            stream::process_tarball_stream(
451                stream_reader,
452                &mut project_analysis,
453                &self.filter,
454                self.progress_hook.as_ref(),
455            )
456            .await?;
457        }
458
459        #[cfg(target_arch = "wasm32")]
460        {
461            let bytes = response.bytes().await.map_err(|e| {
462                crate::core::error::AnalysisError::network(format!(
463                    "Failed to read response bytes: {}",
464                    e
465                ))
466            })?;
467
468            self.progress_hook
469                .on_download_progress(bytes.len() as u64, total_size);
470            self.progress_hook.on_processing_start("Processing...");
471
472            stream::process_tarball(
473                bytes,
474                &mut project_analysis,
475                &self.filter,
476                self.progress_hook.as_ref(),
477            )
478            .await?;
479        }
480
481        Ok(project_analysis)
482    }
483
484    /// Build HTTP client using global configuration
485    fn build_global_client(&self) -> Result<Client> {
486        // Use archive provider to build client (it has good defaults)
487        let archive_provider = ArchiveProvider::new();
488        archive_provider
489            .build_client(&self.global_config)
490            .map_err(|e| {
491                crate::core::error::AnalysisError::network(format!(
492                    "Failed to build HTTP client: {}",
493                    e
494                ))
495            })
496    }
497
498    /// Expand shorthand URLs to full URLs
499    fn expand_url(&self, url: &str) -> String {
500        if url.starts_with("http://") || url.starts_with("https://") {
501            return url.to_string();
502        }
503
504        // Handle shorthand notation like "user/repo@branch"
505        if url.contains('/') && !url.starts_with("http://") && !url.starts_with("https://") {
506            let parts: Vec<&str> = url.split('@').collect();
507            let repo_part = parts[0];
508            let branch_or_commit = parts.get(1);
509
510            let path_parts: Vec<&str> = repo_part.split('/').collect();
511            if path_parts.len() == 2 {
512                if let Some(branch) = branch_or_commit {
513                    // Check if it looks like a commit hash
514                    if branch.len() >= 7 && branch.chars().all(|c| c.is_ascii_hexdigit()) {
515                        return format!("https://github.com/{}/commit/{}", repo_part, branch);
516                    } else {
517                        return format!("https://github.com/{}/tree/{}", repo_part, branch);
518                    }
519                } else {
520                    return format!("https://github.com/{}", repo_part);
521                }
522            }
523        }
524
525        url.to_string()
526    }
527
528    /// Extract project name from a direct URL
529    fn extract_project_name_from_url(&self, url: &str) -> String {
530        let url_path = url.trim_end_matches('/');
531
532        if let Some(filename) = url_path.split('/').next_back() {
533            if filename.ends_with(".tar.gz") {
534                return filename.trim_end_matches(".tar.gz").to_string();
535            }
536            if filename.ends_with(".tgz") {
537                return filename.trim_end_matches(".tgz").to_string();
538            }
539            return filename.to_string();
540        }
541
542        "remote-project".to_string()
543    }
544}
545
546impl Default for RemoteAnalyzer {
547    fn default() -> Self {
548        Self::new()
549    }
550}