bytes_radar/net/
mod.rs

1pub mod providers;
2pub mod stream;
3pub mod traits;
4
5use crate::core::{analysis::ProjectAnalysis, error::Result, filter::IntelligentFilter};
6use providers::*;
7use reqwest::Client;
8use std::collections::HashMap;
9use std::sync::Arc;
10use traits::{GitProvider, NoOpProgressHook};
11
12pub use traits::{ParsedRepository, ProgressHook, ProviderConfig};
13
14/// Remote repository analyzer with comprehensive configuration support
15///
16/// The RemoteAnalyzer supports multiple Git hosting providers and allows
17/// extensive customization of HTTP requests, authentication, and processing behavior.
18///
19/// # Examples
20///
21/// ```rust
22/// use bytes_radar::net::{RemoteAnalyzer, ProviderConfig};
23///
24/// // Basic usage
25/// let mut analyzer = RemoteAnalyzer::new();
26///
27/// // With custom configuration
28/// let config = ProviderConfig::new()
29///     .with_timeout(120)
30///     .with_header("X-Custom-Header", "value")
31///     .with_credential("token", "your-token");
32///
33/// analyzer.set_global_config(config);
34/// ```
35pub struct RemoteAnalyzer {
36    providers: Vec<Box<dyn GitProvider>>,
37    global_config: ProviderConfig,
38    filter: IntelligentFilter,
39    progress_hook: Arc<dyn ProgressHook>,
40    provider_configs: HashMap<String, ProviderConfig>,
41}
42
43impl RemoteAnalyzer {
44    /// Create a new analyzer with default configuration
45    pub fn new() -> Self {
46        let mut analyzer = Self {
47            providers: Vec::new(),
48            global_config: ProviderConfig::default(),
49            filter: IntelligentFilter::default(),
50            progress_hook: Arc::new(NoOpProgressHook),
51            provider_configs: HashMap::new(),
52        };
53
54        analyzer.register_default_providers();
55        analyzer
56    }
57
58    /// Register all default Git providers
59    fn register_default_providers(&mut self) {
60        self.providers.push(Box::new(GitHubProvider::new()));
61        self.providers.push(Box::new(GitLabProvider::new()));
62        self.providers.push(Box::new(BitbucketProvider::new()));
63        self.providers.push(Box::new(CodebergProvider::new()));
64        self.providers.push(Box::new(GiteaProvider::new()));
65        self.providers.push(Box::new(SourceForgeProvider::new()));
66        self.providers.push(Box::new(AzureDevOpsProvider::new()));
67        self.providers.push(Box::new(ArchiveProvider::new()));
68    }
69
70    /// Set a progress hook for monitoring operations
71    ///
72    /// # Arguments
73    /// * `hook` - Progress hook implementation
74    ///
75    /// # Examples
76    /// ```rust
77    /// use bytes_radar::net::{RemoteAnalyzer, ProgressHook};
78    ///
79    /// struct MyHook;
80    /// impl ProgressHook for MyHook {
81    ///     fn on_download_progress(&self, downloaded: u64, total: Option<u64>) {
82    ///         println!("Downloaded: {} bytes", downloaded);
83    ///     }
84    ///     fn on_processing_start(&self, message: &str) {
85    ///         println!("Processing: {}", message);
86    ///     }
87    ///     fn on_processing_progress(&self, current: usize, total: usize) {
88    ///         println!("Progress: {}/{}", current, total);
89    ///     }
90    /// }
91    ///
92    /// let mut analyzer = RemoteAnalyzer::new();
93    /// analyzer.set_progress_hook(MyHook);
94    /// ```
95    pub fn set_progress_hook<H: ProgressHook + 'static>(&mut self, hook: H) {
96        self.progress_hook = Arc::new(hook);
97    }
98
99    /// Set global configuration that applies to all providers
100    ///
101    /// # Arguments
102    /// * `config` - Global configuration
103    ///
104    /// # Examples
105    /// ```rust
106    /// use bytes_radar::net::{RemoteAnalyzer, ProviderConfig};
107    ///
108    /// let config = ProviderConfig::new()
109    ///     .with_timeout(300)
110    ///     .with_user_agent("my-app/1.0.0")
111    ///     .with_header("X-API-Key", "secret");
112    ///
113    /// let mut analyzer = RemoteAnalyzer::new();
114    /// analyzer.set_global_config(config);
115    /// ```
116    pub fn set_global_config(&mut self, config: ProviderConfig) {
117        self.global_config = config;
118        self.apply_config_to_providers();
119    }
120
121    /// Set configuration for a specific provider
122    ///
123    /// # Arguments
124    /// * `provider_name` - Name of the provider (e.g., "github", "gitlab")
125    /// * `config` - Provider-specific configuration
126    ///
127    /// # Examples
128    /// ```rust
129    /// use bytes_radar::net::{RemoteAnalyzer, ProviderConfig};
130    ///
131    /// let github_config = ProviderConfig::new()
132    ///     .with_credential("token", "github-token")
133    ///     .with_header("Accept", "application/vnd.github.v3+json");
134    ///
135    /// let mut analyzer = RemoteAnalyzer::new();
136    /// analyzer.set_provider_config("github", github_config);
137    /// ```
138    pub fn set_provider_config(&mut self, provider_name: &str, config: ProviderConfig) {
139        self.provider_configs
140            .insert(provider_name.to_string(), config);
141        self.apply_config_to_providers();
142    }
143
144    /// Apply configurations to all providers
145    fn apply_config_to_providers(&mut self) {
146        for provider in &mut self.providers {
147            let provider_name = provider.name();
148
149            // Start with global config
150            let mut config = self.global_config.clone();
151
152            // Override with provider-specific config if exists
153            if let Some(provider_config) = self.provider_configs.get(provider_name) {
154                // Merge configurations (provider-specific takes precedence)
155                config.headers.extend(provider_config.headers.clone());
156                config
157                    .credentials
158                    .extend(provider_config.credentials.clone());
159                config
160                    .provider_settings
161                    .extend(provider_config.provider_settings.clone());
162
163                if provider_config.timeout.is_some() {
164                    config.timeout = provider_config.timeout;
165                }
166                if provider_config.max_redirects.is_some() {
167                    config.max_redirects = provider_config.max_redirects;
168                }
169                if provider_config.user_agent.is_some() {
170                    config.user_agent = provider_config.user_agent.clone();
171                }
172                if provider_config.max_file_size.is_some() {
173                    config.max_file_size = provider_config.max_file_size;
174                }
175                if provider_config.proxy.is_some() {
176                    config.proxy = provider_config.proxy.clone();
177                }
178
179                config.accept_invalid_certs = provider_config.accept_invalid_certs;
180                config.use_compression = provider_config.use_compression;
181            }
182
183            provider.apply_config(&config);
184        }
185    }
186
187    /// Set file filtering configuration
188    ///
189    /// # Arguments
190    /// * `filter` - File filter configuration
191    pub fn set_filter(&mut self, filter: IntelligentFilter) {
192        self.filter = filter;
193    }
194
195    /// Enable or disable aggressive file filtering
196    ///
197    /// # Arguments
198    /// * `enabled` - Whether to enable aggressive filtering
199    pub fn set_aggressive_filtering(&mut self, enabled: bool) {
200        if enabled {
201            self.filter = IntelligentFilter::aggressive();
202        } else {
203            self.filter = IntelligentFilter::default();
204        }
205    }
206
207    // Legacy methods for backward compatibility
208
209    /// Set timeout for all providers (legacy method)
210    ///
211    /// # Arguments
212    /// * `timeout` - Timeout in seconds
213    pub fn set_timeout(&mut self, timeout: u64) {
214        self.global_config.timeout = Some(timeout);
215        self.apply_config_to_providers();
216    }
217
218    /// Set whether to accept invalid SSL certificates (legacy method)
219    ///
220    /// # Arguments
221    /// * `allow_insecure` - Whether to accept invalid certificates
222    pub fn set_allow_insecure(&mut self, allow_insecure: bool) {
223        self.global_config.accept_invalid_certs = allow_insecure;
224        self.apply_config_to_providers();
225    }
226
227    /// Set credentials for a specific provider (legacy method)
228    ///
229    /// # Arguments
230    /// * `provider_name` - Name of the provider
231    /// * `credentials` - Credentials map
232    pub fn set_provider_credentials(
233        &mut self,
234        provider_name: &str,
235        credentials: HashMap<String, String>,
236    ) {
237        let config = self
238            .provider_configs
239            .entry(provider_name.to_string())
240            .or_insert_with(ProviderConfig::default);
241
242        config.credentials.extend(credentials);
243        self.apply_config_to_providers();
244    }
245
246    /// Analyze a repository from its URL
247    ///
248    /// # Arguments
249    /// * `url` - Repository URL or shorthand notation
250    ///
251    /// # Examples
252    /// ```rust,no_run
253    /// use bytes_radar::net::RemoteAnalyzer;
254    ///
255    /// #[tokio::main]
256    /// async fn main() -> Result<(), Box<dyn std::error::Error>> {
257    ///     let analyzer = RemoteAnalyzer::new();
258    ///
259    ///     // Full URLs
260    ///     let analysis = analyzer.analyze_url("https://github.com/user/repo").await?;
261    ///
262    ///     // Shorthand notation
263    ///     let analysis = analyzer.analyze_url("user/repo@main").await?;
264    ///
265    ///     // Direct archive
266    ///     let analysis = analyzer.analyze_url("https://example.com/project.tar.gz").await?;
267    ///     
268    ///     Ok(())
269    /// }
270    /// ```
271    pub async fn analyze_url(&self, url: &str) -> Result<ProjectAnalysis> {
272        let expanded_url = self.expand_url(url);
273
274        // Try direct archive first for better performance
275        if expanded_url.ends_with(".tar.gz") || expanded_url.ends_with(".tgz") {
276            return self.analyze_direct_tarball(&expanded_url).await;
277        }
278
279        // Try each provider
280        for provider in &self.providers {
281            if provider.can_handle(&expanded_url) {
282                if let Some(parsed) = provider.parse_url(&expanded_url) {
283                    match self.analyze_with_provider(provider.as_ref(), &parsed).await {
284                        Ok(analysis) => return Ok(analysis),
285                        Err(e) => {
286                            #[cfg(feature = "cli")]
287                            log::debug!(
288                                "Provider {} failed for {}: {}",
289                                provider.name(),
290                                expanded_url,
291                                e
292                            );
293                            continue;
294                        }
295                    }
296                }
297            }
298        }
299
300        Err(crate::core::error::AnalysisError::url_parsing(format!(
301            "Unsupported URL format: {}. Supported formats include GitHub, GitLab, Bitbucket, Codeberg, Gitea, SourceForge, Azure DevOps, and direct archive URLs.",
302            expanded_url
303        )))
304    }
305
306    /// Analyze using a specific provider
307    async fn analyze_with_provider(
308        &self,
309        provider: &dyn GitProvider,
310        parsed: &ParsedRepository,
311    ) -> Result<ProjectAnalysis> {
312        let mut download_urls = provider.build_download_urls(parsed);
313
314        // If no URLs and no specific branch/commit, try common branches
315        if download_urls.is_empty() && parsed.branch_or_commit.is_none() {
316            let mut branches = vec![
317                "main".to_string(),
318                "master".to_string(),
319                "develop".to_string(),
320                "dev".to_string(),
321            ];
322
323            // Try to get default branch from API
324            #[cfg(not(target_arch = "wasm32"))]
325            {
326                let config = self.get_effective_config(provider.name());
327                if let Ok(client) = provider.build_client(&config) {
328                    if let Some(default_branch) = provider.get_default_branch(&client, parsed).await
329                    {
330                        branches.insert(0, default_branch);
331                        branches.dedup();
332                    }
333                }
334            }
335
336            // Generate URLs for each branch
337            for branch in branches {
338                let mut branch_parsed = parsed.clone();
339                branch_parsed.branch_or_commit = Some(branch);
340                download_urls.extend(provider.build_download_urls(&branch_parsed));
341            }
342        }
343
344        // Try each download URL
345        for download_url in download_urls {
346            match self
347                .analyze_direct_tarball_with_name(&download_url, &parsed.project_name)
348                .await
349            {
350                Ok(analysis) => return Ok(analysis),
351                Err(e) => {
352                    #[cfg(feature = "cli")]
353                    log::debug!("Failed to download from {}: {}", download_url, e);
354                    continue;
355                }
356            }
357        }
358
359        Err(crate::core::error::AnalysisError::network(
360            "All download URLs failed".to_string(),
361        ))
362    }
363
364    /// Get effective configuration for a provider
365    fn get_effective_config(&self, provider_name: &str) -> ProviderConfig {
366        let mut config = self.global_config.clone();
367
368        if let Some(provider_config) = self.provider_configs.get(provider_name) {
369            // Merge configurations
370            config.headers.extend(provider_config.headers.clone());
371            config
372                .credentials
373                .extend(provider_config.credentials.clone());
374            config
375                .provider_settings
376                .extend(provider_config.provider_settings.clone());
377
378            if provider_config.timeout.is_some() {
379                config.timeout = provider_config.timeout;
380            }
381            if provider_config.max_redirects.is_some() {
382                config.max_redirects = provider_config.max_redirects;
383            }
384            if provider_config.user_agent.is_some() {
385                config.user_agent = provider_config.user_agent.clone();
386            }
387            if provider_config.max_file_size.is_some() {
388                config.max_file_size = provider_config.max_file_size;
389            }
390            if provider_config.proxy.is_some() {
391                config.proxy = provider_config.proxy.clone();
392            }
393
394            config.accept_invalid_certs = provider_config.accept_invalid_certs;
395            config.use_compression = provider_config.use_compression;
396        }
397
398        config
399    }
400
401    /// Analyze a direct archive URL
402    async fn analyze_direct_tarball(&self, url: &str) -> Result<ProjectAnalysis> {
403        let project_name = self.extract_project_name_from_url(url);
404        self.analyze_direct_tarball_with_name(url, &project_name)
405            .await
406    }
407
408    /// Analyze a direct archive URL with custom project name
409    async fn analyze_direct_tarball_with_name(
410        &self,
411        url: &str,
412        project_name: &str,
413    ) -> Result<ProjectAnalysis> {
414        let mut project_analysis = ProjectAnalysis::new(project_name);
415
416        // Use global config to build client for direct downloads
417        let client = self.build_global_client()?;
418
419        let response = client.get(url).send().await.map_err(|e| {
420            crate::core::error::AnalysisError::network(format!("Failed to fetch URL: {}", e))
421        })?;
422
423        if !response.status().is_success() {
424            return Err(crate::core::error::AnalysisError::network(format!(
425                "HTTP request failed with status: {}",
426                response.status()
427            )));
428        }
429
430        let total_size = response.content_length();
431        self.progress_hook.on_download_progress(0, total_size);
432
433        let stream = response.bytes_stream();
434        let progress_hook = Arc::clone(&self.progress_hook);
435        let stream_reader = stream::StreamReader::new(
436            stream,
437            Box::new(move |downloaded, total| {
438                progress_hook.on_download_progress(downloaded, total);
439                log::debug!(
440                    "Downloaded: {} bytes of {} total",
441                    downloaded,
442                    total
443                        .map(|t| t.to_string())
444                        .unwrap_or_else(|| "unknown".to_string())
445                );
446            }),
447            total_size,
448        );
449
450        self.progress_hook.on_processing_start("Processing...");
451        stream::process_tarball_stream(
452            stream_reader,
453            &mut project_analysis,
454            &self.filter,
455            self.progress_hook.as_ref(),
456        )
457        .await?;
458
459        Ok(project_analysis)
460    }
461
462    /// Build HTTP client using global configuration
463    fn build_global_client(&self) -> Result<Client> {
464        // Use archive provider to build client (it has good defaults)
465        let archive_provider = ArchiveProvider::new();
466        archive_provider
467            .build_client(&self.global_config)
468            .map_err(|e| {
469                crate::core::error::AnalysisError::network(format!(
470                    "Failed to build HTTP client: {}",
471                    e
472                ))
473            })
474    }
475
476    /// Expand shorthand URLs to full URLs
477    fn expand_url(&self, url: &str) -> String {
478        if url.starts_with("http://") || url.starts_with("https://") {
479            return url.to_string();
480        }
481
482        // Handle shorthand notation like "user/repo@branch"
483        if url.contains('/') && !url.starts_with("http://") && !url.starts_with("https://") {
484            let parts: Vec<&str> = url.split('@').collect();
485            let repo_part = parts[0];
486            let branch_or_commit = parts.get(1);
487
488            let path_parts: Vec<&str> = repo_part.split('/').collect();
489            if path_parts.len() == 2 {
490                if let Some(branch) = branch_or_commit {
491                    // Check if it looks like a commit hash
492                    if branch.len() >= 7 && branch.chars().all(|c| c.is_ascii_hexdigit()) {
493                        return format!("https://github.com/{}/commit/{}", repo_part, branch);
494                    } else {
495                        return format!("https://github.com/{}/tree/{}", repo_part, branch);
496                    }
497                } else {
498                    return format!("https://github.com/{}", repo_part);
499                }
500            }
501        }
502
503        url.to_string()
504    }
505
506    /// Extract project name from a direct URL
507    fn extract_project_name_from_url(&self, url: &str) -> String {
508        let url_path = url.trim_end_matches('/');
509
510        if let Some(filename) = url_path.split('/').next_back() {
511            if filename.ends_with(".tar.gz") {
512                return filename.trim_end_matches(".tar.gz").to_string();
513            }
514            if filename.ends_with(".tgz") {
515                return filename.trim_end_matches(".tgz").to_string();
516            }
517            return filename.to_string();
518        }
519
520        "remote-project".to_string()
521    }
522}
523
524impl Default for RemoteAnalyzer {
525    fn default() -> Self {
526        Self::new()
527    }
528}