bytes_radar/core/
net.rs

1use crate::core::{
2    analysis::{FileMetrics, ProjectAnalysis},
3    error::{AnalysisError, Result},
4    filter::{FilterStats, IntelligentFilter},
5    registry::LanguageRegistry,
6};
7use flate2::read::GzDecoder;
8use futures_util::StreamExt;
9use reqwest::Client;
10use serde::Deserialize;
11use std::io::{Cursor, Read};
12use tar::Archive;
13
14#[cfg(not(target_arch = "wasm32"))]
15use tokio::task;
16
17static USER_AGENT: &str = "bytes-radar/1.0.0";
18
19#[derive(Deserialize)]
20struct GitHubRepoInfo {
21    default_branch: String,
22}
23
24#[cfg(feature = "cli")]
25use indicatif::ProgressBar;
26
27pub struct RemoteAnalyzer {
28    client: Client,
29    github_token: Option<String>,
30    timeout: u64,
31    allow_insecure: bool,
32    filter: IntelligentFilter,
33    #[cfg(feature = "cli")]
34    progress_bar: Option<ProgressBar>,
35}
36
37impl RemoteAnalyzer {
38    pub fn new() -> Self {
39        let mut builder = Client::builder().user_agent(USER_AGENT);
40
41        #[cfg(not(target_arch = "wasm32"))]
42        {
43            builder = builder.timeout(std::time::Duration::from_secs(300));
44        }
45
46        let client = builder.build().expect("Failed to create HTTP client");
47
48        Self {
49            client,
50            github_token: None,
51            timeout: 300,
52            allow_insecure: false,
53            filter: IntelligentFilter::default(),
54            #[cfg(feature = "cli")]
55            progress_bar: None,
56        }
57    }
58
59    #[cfg(feature = "cli")]
60    pub fn set_progress_bar(&mut self, progress_bar: Option<ProgressBar>) {
61        self.progress_bar = progress_bar;
62    }
63
64    pub fn set_github_token(&mut self, token: &str) {
65        self.github_token = Some(token.to_string());
66        self.rebuild_client();
67    }
68
69    pub fn set_timeout(&mut self, timeout: u64) {
70        self.timeout = timeout;
71        self.rebuild_client();
72    }
73
74    pub fn set_allow_insecure(&mut self, allow_insecure: bool) {
75        self.allow_insecure = allow_insecure;
76        self.rebuild_client();
77    }
78
79    pub fn set_filter(&mut self, filter: IntelligentFilter) {
80        self.filter = filter;
81    }
82
83    pub fn set_aggressive_filtering(&mut self, enabled: bool) {
84        if enabled {
85            self.filter = IntelligentFilter::aggressive();
86        } else {
87            self.filter = IntelligentFilter::default();
88        }
89    }
90
91    fn rebuild_client(&mut self) {
92        let mut builder = Client::builder().user_agent(USER_AGENT);
93
94        #[cfg(not(target_arch = "wasm32"))]
95        {
96            builder = builder.timeout(std::time::Duration::from_secs(self.timeout));
97        }
98
99        #[cfg(not(target_arch = "wasm32"))]
100        if self.allow_insecure {
101            builder = builder.danger_accept_invalid_certs(true);
102        }
103
104        if let Some(token) = &self.github_token {
105            let mut headers = reqwest::header::HeaderMap::new();
106            let auth_value = format!("token {}", token);
107            headers.insert(
108                reqwest::header::AUTHORIZATION,
109                auth_value.parse().expect("Invalid token format"),
110            );
111            builder = builder.default_headers(headers);
112        }
113
114        self.client = builder.build().expect("Failed to create HTTP client");
115    }
116
117    pub async fn analyze_url(&self, url: &str) -> Result<ProjectAnalysis> {
118        let download_urls = self.resolve_git_url(url).await?;
119
120        let mut url_errors: Vec<crate::core::error::DownloadUrlError> = Vec::new();
121        let mut total_attempts = 0u32;
122        for download_url in download_urls {
123            total_attempts += 1;
124            match self.analyze_tarball_with_name(&download_url, url).await {
125                Ok(analysis) => return Ok(analysis),
126                Err(e) => {
127                    #[cfg(feature = "cli")]
128                    log::debug!("Failed to download from {}: {}", download_url, e);
129
130                    let error_info = crate::core::error::DownloadUrlError {
131                        url: download_url.clone(),
132                        error_message: format!("{}", e),
133                        error_type: match e {
134                            AnalysisError::NetworkError { .. } => "NetworkError".to_string(),
135                            AnalysisError::ArchiveError { .. } => "ArchiveError".to_string(),
136                            _ => "UnknownError".to_string(),
137                        },
138                        http_status_code: self.extract_http_status_code(&e),
139                        retry_count: 1,
140                    };
141
142                    url_errors.push(error_info);
143                    continue;
144                }
145            }
146        }
147
148        Err(AnalysisError::network(
149            "All download URLs failed".to_string(),
150        ))
151    }
152
153    async fn resolve_git_url(&self, url: &str) -> Result<Vec<String>> {
154        if url.ends_with(".tar.gz") || url.ends_with(".tgz") {
155            return Ok(vec![url.to_string()]);
156        }
157
158        let expanded_url = self.expand_url(url);
159
160        if expanded_url.starts_with("http://") || expanded_url.starts_with("https://") {
161            if !expanded_url.contains("github.com")
162                && !expanded_url.contains("gitlab.com")
163                && !expanded_url.contains("gitlab.")
164                && !expanded_url.contains("bitbucket.org")
165                && !expanded_url.contains("codeberg.org")
166            {
167                if expanded_url.ends_with(".tar.gz") || expanded_url.ends_with(".tgz") {
168                    return Ok(vec![expanded_url.to_string()]);
169                } else {
170                    return Ok(vec![expanded_url.to_string()]);
171                }
172            }
173        }
174
175        let mut download_urls = Vec::new();
176
177        if let Some(github_url) = self.parse_github_url_with_branch(&expanded_url) {
178            download_urls.push(github_url);
179        }
180
181        if let Some(gitlab_url) = self.parse_gitlab_url_with_branch(&expanded_url) {
182            download_urls.push(gitlab_url);
183        }
184
185        if let Some(bitbucket_url) = self.parse_bitbucket_url_with_branch(&expanded_url) {
186            download_urls.push(bitbucket_url);
187        }
188
189        if let Some(codeberg_url) = self.parse_codeberg_url_with_branch(&expanded_url) {
190            download_urls.push(codeberg_url);
191        }
192
193        if download_urls.is_empty() {
194            let mut branches = vec![
195                "main".to_string(),
196                "master".to_string(),
197                "develop".to_string(),
198                "dev".to_string(),
199            ];
200
201            #[cfg(not(target_arch = "wasm32"))]
202            if expanded_url.contains("github.com") {
203                if let Some(default_branch) = self.get_github_default_branch(&expanded_url).await {
204                    branches.insert(0, default_branch);
205                    branches.dedup();
206                }
207            }
208
209            #[cfg(target_arch = "wasm32")]
210            if expanded_url.contains("github.com") {
211                branches = vec![
212                    "main".to_string(),
213                    "master".to_string(),
214                    "develop".to_string(),
215                    "dev".to_string(),
216                ];
217            }
218
219            for branch in &branches {
220                if let Some(github_url) = self.parse_github_url(&expanded_url, branch) {
221                    download_urls.push(github_url);
222                }
223
224                if let Some(gitlab_url) = self.parse_gitlab_url(&expanded_url, branch) {
225                    download_urls.push(gitlab_url);
226                }
227
228                if let Some(bitbucket_url) = self.parse_bitbucket_url(&expanded_url, branch) {
229                    download_urls.push(bitbucket_url);
230                }
231
232                if let Some(codeberg_url) = self.parse_codeberg_url(&expanded_url, branch) {
233                    download_urls.push(codeberg_url);
234                }
235            }
236        }
237
238        if download_urls.is_empty() {
239            return Err(AnalysisError::url_parsing(format!(
240                "Unsupported URL format or no accessible branch found: {}. Please provide a direct tar.gz URL or a supported repository URL.",
241                expanded_url
242            )));
243        }
244
245        Ok(download_urls)
246    }
247
248    fn parse_github_url_with_branch(&self, url: &str) -> Option<String> {
249        if url.contains("github.com") {
250            if url.contains("/tree/") {
251                let parts: Vec<&str> = url.split('/').collect();
252                if let Some(tree_pos) = parts.iter().position(|&x| x == "tree") {
253                    if tree_pos + 1 < parts.len() && tree_pos >= 2 {
254                        let owner = parts[tree_pos - 2];
255                        let repo = parts[tree_pos - 1];
256                        let branch = parts[tree_pos + 1];
257                        return Some(format!(
258                            "https://github.com/{}/{}/archive/refs/heads/{}.tar.gz",
259                            owner, repo, branch
260                        ));
261                    }
262                }
263            }
264
265            if url.contains("/commit/") {
266                return self.extract_github_commit_url(url);
267            }
268        }
269        None
270    }
271
272    fn parse_gitlab_url_with_branch(&self, url: &str) -> Option<String> {
273        if url.contains("gitlab.com") || url.contains("gitlab.") {
274            if url.contains("/-/tree/") {
275                let parts: Vec<&str> = url.split('/').collect();
276                if let Some(tree_pos) = parts.iter().position(|&x| x == "tree") {
277                    if tree_pos + 1 < parts.len() && tree_pos >= 3 {
278                        let gitlab_pos = parts.iter().position(|&x| x.contains("gitlab")).unwrap();
279                        let host = parts[gitlab_pos];
280                        let owner = parts[gitlab_pos + 1];
281                        let repo = parts[gitlab_pos + 2];
282                        let branch = parts[tree_pos + 1];
283                        return Some(format!(
284                            "https://{}/{}{}/-/archive/{}/{}-{}.tar.gz",
285                            host,
286                            owner,
287                            if parts.len() > gitlab_pos + 3 && parts[gitlab_pos + 3] != "-" {
288                                format!("/{}", parts[gitlab_pos + 3..tree_pos - 1].join("/"))
289                            } else {
290                                String::new()
291                            },
292                            branch,
293                            repo,
294                            branch
295                        ));
296                    }
297                }
298            }
299        }
300        None
301    }
302
303    fn parse_bitbucket_url_with_branch(&self, url: &str) -> Option<String> {
304        if url.contains("bitbucket.org") {
305            if url.contains("/commits/") {
306                let parts: Vec<&str> = url.split('/').collect();
307                if let Some(commits_pos) = parts.iter().position(|&x| x == "commits") {
308                    if commits_pos + 1 < parts.len() && commits_pos >= 2 {
309                        let owner = parts[commits_pos - 2];
310                        let repo = parts[commits_pos - 1];
311                        let commit = parts[commits_pos + 1];
312                        return Some(format!(
313                            "https://bitbucket.org/{}/{}/get/{}.tar.gz",
314                            owner, repo, commit
315                        ));
316                    }
317                }
318            }
319
320            if url.contains("/branch/") {
321                let parts: Vec<&str> = url.split('/').collect();
322                if let Some(branch_pos) = parts.iter().position(|&x| x == "branch") {
323                    if branch_pos + 1 < parts.len() && branch_pos >= 2 {
324                        let owner = parts[branch_pos - 2];
325                        let repo = parts[branch_pos - 1];
326                        let branch = parts[branch_pos + 1];
327                        return Some(format!(
328                            "https://bitbucket.org/{}/{}/get/{}.tar.gz",
329                            owner, repo, branch
330                        ));
331                    }
332                }
333            }
334        }
335        None
336    }
337
338    fn parse_codeberg_url_with_branch(&self, url: &str) -> Option<String> {
339        if url.contains("codeberg.org") {
340            if url.contains("/commit/") {
341                let parts: Vec<&str> = url.split('/').collect();
342                if let Some(commit_pos) = parts.iter().position(|&x| x == "commit") {
343                    if commit_pos + 1 < parts.len() && commit_pos >= 2 {
344                        let owner = parts[commit_pos - 2];
345                        let repo = parts[commit_pos - 1];
346                        let commit = parts[commit_pos + 1];
347                        return Some(format!(
348                            "https://codeberg.org/{}/{}/archive/{}.tar.gz",
349                            owner, repo, commit
350                        ));
351                    }
352                }
353            }
354
355            if url.contains("/src/branch/") {
356                let parts: Vec<&str> = url.split('/').collect();
357                if let Some(branch_pos) = parts.iter().position(|&x| x == "branch") {
358                    if branch_pos + 1 < parts.len() && branch_pos >= 3 {
359                        let owner = parts[branch_pos - 3];
360                        let repo = parts[branch_pos - 2];
361                        let branch = parts[branch_pos + 1];
362                        return Some(format!(
363                            "https://codeberg.org/{}/{}/archive/{}.tar.gz",
364                            owner, repo, branch
365                        ));
366                    }
367                }
368            }
369        }
370        None
371    }
372
373    fn parse_bitbucket_url(&self, url: &str, branch: &str) -> Option<String> {
374        if url.contains("bitbucket.org") {
375            let parts: Vec<&str> = url.split('/').collect();
376            if let Some(bitbucket_pos) = parts.iter().position(|&x| x == "bitbucket.org") {
377                if bitbucket_pos + 2 < parts.len() {
378                    let owner = parts[bitbucket_pos + 1];
379                    let repo = parts[bitbucket_pos + 2];
380                    return Some(format!(
381                        "https://bitbucket.org/{}/{}/get/{}.tar.gz",
382                        owner, repo, branch
383                    ));
384                }
385            }
386        }
387        None
388    }
389
390    fn parse_codeberg_url(&self, url: &str, branch: &str) -> Option<String> {
391        if url.contains("codeberg.org") {
392            let parts: Vec<&str> = url.split('/').collect();
393            if let Some(codeberg_pos) = parts.iter().position(|&x| x == "codeberg.org") {
394                if codeberg_pos + 2 < parts.len() {
395                    let owner = parts[codeberg_pos + 1];
396                    let repo = parts[codeberg_pos + 2];
397                    return Some(format!(
398                        "https://codeberg.org/{}/{}/archive/{}.tar.gz",
399                        owner, repo, branch
400                    ));
401                }
402            }
403        }
404        None
405    }
406
407    async fn check_url_exists(&self, url: &str) -> bool {
408        if let Ok(response) = self.client.head(url).send().await {
409            response.status().is_success()
410        } else {
411            false
412        }
413    }
414
415    fn extract_http_status_code(&self, error: &AnalysisError) -> Option<u16> {
416        match error {
417            AnalysisError::NetworkError { message } => {
418                if message.contains("HTTP request failed with status: ") {
419                    if let Some(start) = message.find("HTTP request failed with status: ") {
420                        let status_start = start + "HTTP request failed with status: ".len();
421                        let status_str = &message[status_start..];
422                        if let Some(end) = status_str.find(' ') {
423                            status_str[..end].parse().ok()
424                        } else {
425                            status_str.parse().ok()
426                        }
427                    } else {
428                        None
429                    }
430                } else {
431                    None
432                }
433            }
434            _ => None,
435        }
436    }
437
438    fn expand_url(&self, url: &str) -> String {
439        if url.starts_with("http://") || url.starts_with("https://") {
440            return url.to_string();
441        }
442
443        if url.contains('/') && !url.starts_with("http://") && !url.starts_with("https://") {
444            let parts: Vec<&str> = url.split('@').collect();
445            let repo_part = parts[0];
446            let branch_or_commit = parts.get(1);
447
448            let path_parts: Vec<&str> = repo_part.split('/').collect();
449            if path_parts.len() == 2 {
450                if let Some(branch) = branch_or_commit {
451                    if branch.len() >= 7 && branch.chars().all(|c| c.is_ascii_hexdigit()) {
452                        return format!("https://github.com/{}/commit/{}", repo_part, branch);
453                    } else {
454                        return format!("https://github.com/{}/tree/{}", repo_part, branch);
455                    }
456                } else {
457                    return format!("https://github.com/{}", repo_part);
458                }
459            }
460        }
461
462        url.to_string()
463    }
464
465    #[cfg(not(target_arch = "wasm32"))]
466    async fn get_github_default_branch(&self, url: &str) -> Option<String> {
467        let (owner, repo) = self.extract_github_owner_repo(url)?;
468
469        let api_url = format!("https://api.github.com/repos/{}/{}", owner, repo);
470
471        match self.client.get(&api_url).send().await {
472            Ok(response) => {
473                if response.status().is_success() {
474                    match response.json::<GitHubRepoInfo>().await {
475                        Ok(repo_info) => {
476                            #[cfg(feature = "cli")]
477                            log::debug!(
478                                "GitHub API: Found default branch '{}' for {}/{}",
479                                repo_info.default_branch,
480                                owner,
481                                repo
482                            );
483                            Some(repo_info.default_branch)
484                        }
485                        Err(_) => {
486                            #[cfg(feature = "cli")]
487                            log::debug!(
488                                "GitHub API: Failed to parse response for {}/{}",
489                                owner,
490                                repo
491                            );
492                            None
493                        }
494                    }
495                } else {
496                    #[cfg(feature = "cli")]
497                    log::debug!(
498                        "GitHub API: Request failed with status {} for {}/{}",
499                        response.status(),
500                        owner,
501                        repo
502                    );
503                    None
504                }
505            }
506            Err(_) => {
507                #[cfg(feature = "cli")]
508                log::debug!("GitHub API: Network error for {}/{}", owner, repo);
509                None
510            }
511        }
512    }
513
514    fn extract_github_owner_repo(&self, url: &str) -> Option<(String, String)> {
515        let url = url.trim_end_matches('/');
516
517        if let Some(github_url) = url.strip_prefix("https://github.com/") {
518            let parts: Vec<&str> = github_url.split('/').collect();
519            if parts.len() >= 2 {
520                return Some((parts[0].to_string(), parts[1].to_string()));
521            }
522        }
523
524        if url.contains("github.com") {
525            let parts: Vec<&str> = url.split('/').collect();
526            if let Some(github_pos) = parts.iter().position(|&x| x == "github.com") {
527                if github_pos + 2 < parts.len() {
528                    return Some((
529                        parts[github_pos + 1].to_string(),
530                        parts[github_pos + 2].to_string(),
531                    ));
532                }
533            }
534        }
535
536        let parts: Vec<&str> = url.split('@').collect();
537        let repo_part = parts[0];
538        let path_parts: Vec<&str> = repo_part.split('/').collect();
539        if path_parts.len() == 2 {
540            return Some((path_parts[0].to_string(), path_parts[1].to_string()));
541        }
542
543        None
544    }
545
546    fn parse_github_url(&self, url: &str, branch: &str) -> Option<String> {
547        let url = url.trim_end_matches('/');
548
549        if url.contains("github.com") {
550            if let Some(commit_url) = self.extract_github_commit_url(url) {
551                return Some(commit_url);
552            }
553
554            if let Some(repo_url) = self.extract_github_repo_url(url, branch) {
555                return Some(repo_url);
556            }
557        }
558
559        None
560    }
561
562    fn extract_github_commit_url(&self, url: &str) -> Option<String> {
563        if url.contains("/commit/") {
564            let parts: Vec<&str> = url.split('/').collect();
565            if let Some(commit_pos) = parts.iter().position(|&x| x == "commit") {
566                if commit_pos + 1 < parts.len() {
567                    let owner = parts.get(parts.len() - 4)?;
568                    let repo = parts.get(parts.len() - 3)?;
569                    let commit = parts.get(commit_pos + 1)?;
570                    return Some(format!(
571                        "https://github.com/{}/{}/archive/{}.tar.gz",
572                        owner, repo, commit
573                    ));
574                }
575            }
576        }
577        None
578    }
579
580    fn extract_github_repo_url(&self, url: &str, branch: &str) -> Option<String> {
581        let parts: Vec<&str> = url.split('/').collect();
582        if parts.len() >= 2 && parts.contains(&"github.com") {
583            if let Some(github_pos) = parts.iter().position(|&x| x == "github.com") {
584                if github_pos + 2 < parts.len() {
585                    let owner = parts[github_pos + 1];
586                    let repo = parts[github_pos + 2];
587                    return Some(format!(
588                        "https://github.com/{}/{}/archive/refs/heads/{}.tar.gz",
589                        owner, repo, branch
590                    ));
591                }
592            }
593        }
594        None
595    }
596
597    fn parse_gitlab_url(&self, url: &str, branch: &str) -> Option<String> {
598        let url = url.trim_end_matches('/');
599
600        if url.contains("gitlab.com") || url.contains("gitlab.") {
601            let parts: Vec<&str> = url.split('/').collect();
602            if let Some(gitlab_pos) = parts.iter().position(|&x| x.contains("gitlab")) {
603                if gitlab_pos + 2 < parts.len() {
604                    let host = parts[gitlab_pos];
605                    let owner = parts[gitlab_pos + 1];
606                    let repo = parts[gitlab_pos + 2];
607                    return Some(format!(
608                        "https://{}/{}{}/-/archive/{}/{}-{}.tar.gz",
609                        host,
610                        owner,
611                        if parts.len() > gitlab_pos + 3 {
612                            format!("/{}", parts[gitlab_pos + 3..].join("/"))
613                        } else {
614                            String::new()
615                        },
616                        branch,
617                        repo,
618                        branch
619                    ));
620                }
621            }
622        }
623
624        None
625    }
626
627    async fn analyze_tarball_with_name(
628        &self,
629        download_url: &str,
630        original_url: &str,
631    ) -> Result<ProjectAnalysis> {
632        let project_name = self.extract_project_name_from_original(original_url);
633        let mut project_analysis = ProjectAnalysis::new(project_name);
634
635        let response = self
636            .client
637            .get(download_url)
638            .send()
639            .await
640            .map_err(|e| AnalysisError::network(format!("Failed to fetch URL: {}", e)))?;
641
642        if !response.status().is_success() {
643            return Err(AnalysisError::network(format!(
644                "HTTP request failed with status: {}",
645                response.status()
646            )));
647        }
648
649        let total_size = response.content_length();
650
651        #[cfg(feature = "cli")]
652        if let Some(pb) = &self.progress_bar {
653            if let Some(size) = total_size {
654                use indicatif::ProgressStyle;
655                pb.set_style(
656                    ProgressStyle::default_bar()
657                        .template("[{elapsed_precise}] [{wide_bar:.cyan/blue}] {decimal_bytes_per_sec} {binary_bytes}/{binary_total_bytes} ({eta}) {msg}")
658                        .unwrap_or_else(|_| ProgressStyle::default_bar())
659                        .progress_chars("#>-"),
660                );
661                pb.set_length(size);
662                pb.set_message("Downloading and processing...");
663            } else {
664                pb.set_message("Downloading and processing...");
665                pb.enable_steady_tick(std::time::Duration::from_millis(120));
666            }
667        }
668
669        #[cfg(target_arch = "wasm32")]
670        {
671            let bytes = response
672                .bytes()
673                .await
674                .map_err(|e| AnalysisError::network(format!("Failed to read response: {}", e)))?;
675
676            #[cfg(feature = "cli")]
677            if let Some(pb) = &self.progress_bar {
678                pb.set_message("Processing archive...");
679            }
680
681            self.process_tarball_bytes(&bytes, &mut project_analysis)
682                .await?;
683        }
684
685        #[cfg(not(target_arch = "wasm32"))]
686        {
687            let stream = response.bytes_stream();
688            let stream_reader = StreamReader::new(
689                stream,
690                #[cfg(feature = "cli")]
691                self.progress_bar.clone(),
692                total_size,
693            );
694
695            #[cfg(feature = "cli")]
696            if let Some(pb) = &self.progress_bar {
697                pb.set_message("Processing archive...");
698            }
699
700            self.process_tarball_stream(stream_reader, &mut project_analysis)
701                .await?;
702        }
703
704        Ok(project_analysis)
705    }
706
707    async fn process_tarball_stream(
708        &self,
709        stream_reader: StreamReader,
710        project_analysis: &mut ProjectAnalysis,
711    ) -> Result<()> {
712        #[cfg(not(target_arch = "wasm32"))]
713        {
714            let filter = self.filter.clone();
715            let metrics_result = task::spawn_blocking(move || {
716                let decoder = GzDecoder::new(stream_reader);
717                let mut archive = Archive::new(decoder);
718
719                let entries = archive.entries().map_err(|e| {
720                    AnalysisError::archive(format!("Failed to read tar entries: {}", e))
721                })?;
722
723                let mut collected_metrics = Vec::new();
724                let mut stats = FilterStats::new();
725
726                for entry in entries {
727                    let entry = entry.map_err(|e| {
728                        AnalysisError::archive(format!("Failed to read tar entry: {}", e))
729                    })?;
730
731                    if let Ok(metrics) = Self::process_tar_entry_sync(entry, &filter, &mut stats) {
732                        collected_metrics.push(metrics);
733                    }
734                }
735
736                #[cfg(feature = "cli")]
737                log::info!(
738                    "Filter stats: processed {}/{} files ({:.1}% filtered), saved {}",
739                    stats.processed,
740                    stats.total_entries,
741                    stats.filter_ratio() * 100.0,
742                    stats.format_bytes_saved()
743                );
744
745                Ok::<Vec<FileMetrics>, AnalysisError>(collected_metrics)
746            })
747            .await
748            .map_err(|e| AnalysisError::archive(format!("Task join error: {}", e)))??;
749
750            for metrics in metrics_result {
751                project_analysis.add_file_metrics(metrics)?;
752            }
753        }
754
755        #[cfg(target_arch = "wasm32")]
756        {
757            let decoder = GzDecoder::new(stream_reader);
758            let mut archive = Archive::new(decoder);
759
760            let entries = archive.entries().map_err(|e| {
761                AnalysisError::archive(format!("Failed to read tar entries: {}", e))
762            })?;
763
764            let mut stats = FilterStats::new();
765
766            for entry in entries {
767                let entry = entry.map_err(|e| {
768                    AnalysisError::archive(format!("Failed to read tar entry: {}", e))
769                })?;
770
771                if let Ok(metrics) = Self::process_tar_entry_sync(entry, &self.filter, &mut stats) {
772                    project_analysis.add_file_metrics(metrics)?;
773                }
774            }
775
776            web_sys::console::log_1(
777                &format!(
778                    "Filter stats: processed {}/{} files ({:.1}% filtered), saved {}",
779                    stats.processed,
780                    stats.total_entries,
781                    stats.filter_ratio() * 100.0,
782                    stats.format_bytes_saved()
783                )
784                .into(),
785            );
786        }
787
788        Ok(())
789    }
790
791    #[cfg(target_arch = "wasm32")]
792    async fn process_tarball_bytes(
793        &self,
794        bytes: &bytes::Bytes,
795        project_analysis: &mut ProjectAnalysis,
796    ) -> Result<()> {
797        let cursor = Cursor::new(bytes.as_ref());
798        let decoder = GzDecoder::new(cursor);
799        let mut archive = Archive::new(decoder);
800
801        let entries = archive
802            .entries()
803            .map_err(|e| AnalysisError::archive(format!("Failed to read tar entries: {}", e)))?;
804
805        let mut stats = FilterStats::new();
806
807        for entry in entries {
808            let entry = entry
809                .map_err(|e| AnalysisError::archive(format!("Failed to read tar entry: {}", e)))?;
810
811            if let Ok(metrics) = Self::process_tar_entry_sync(entry, &self.filter, &mut stats) {
812                project_analysis.add_file_metrics(metrics)?;
813            }
814        }
815
816        web_sys::console::log_1(
817            &format!(
818                "Filter stats: processed {}/{} files ({:.1}% filtered), saved {}",
819                stats.processed,
820                stats.total_entries,
821                stats.filter_ratio() * 100.0,
822                stats.format_bytes_saved()
823            )
824            .into(),
825        );
826
827        Ok(())
828    }
829
830    fn process_tar_entry_sync<R: Read>(
831        mut entry: tar::Entry<'_, R>,
832        filter: &IntelligentFilter,
833        stats: &mut FilterStats,
834    ) -> Result<FileMetrics> {
835        let header = entry.header();
836        let path = header
837            .path()
838            .map_err(|e| AnalysisError::archive(format!("Invalid path in tar entry: {}", e)))?;
839
840        let file_path = path.to_string_lossy().to_string();
841
842        if !header.entry_type().is_file() || header.size().unwrap_or(0) == 0 {
843            return Err(AnalysisError::archive("Not a file or empty".to_string()));
844        }
845
846        let file_size = header.size().unwrap_or(0);
847
848        let should_process = filter.should_process_file(&file_path, file_size);
849        stats.record_entry(file_size, !should_process);
850
851        if !should_process {
852            return Err(AnalysisError::archive("File filtered out".to_string()));
853        }
854
855        let language = LanguageRegistry::detect_by_path(&file_path)
856            .map(|l| l.name.clone())
857            .unwrap_or_else(|| "Text".to_string());
858
859        let mut content = String::new();
860        if entry.read_to_string(&mut content).is_err() {
861            return Err(AnalysisError::archive(
862                "Failed to read file content".to_string(),
863            ));
864        }
865
866        analyze_file_content(&file_path, &content, &language, file_size)
867    }
868
869    fn extract_project_name_from_original(&self, url: &str) -> String {
870        if url.starts_with("http://") || url.starts_with("https://") {
871            let url = url.trim_end_matches('/');
872
873            if url.contains("/tree/") {
874                let parts: Vec<&str> = url.split('/').collect();
875                if let Some(tree_pos) = parts.iter().position(|&x| x == "tree") {
876                    if tree_pos > 1 {
877                        let repo = parts[tree_pos - 1];
878                        let branch = parts.get(tree_pos + 1).unwrap_or(&"unknown");
879                        return format!("{}@{}", repo, branch);
880                    }
881                }
882            }
883
884            if url.contains("/commit/") {
885                let parts: Vec<&str> = url.split('/').collect();
886                if let Some(commit_pos) = parts.iter().position(|&x| x == "commit") {
887                    if commit_pos > 1 {
888                        let repo = parts[commit_pos - 1];
889                        let commit = parts.get(commit_pos + 1).unwrap_or(&"unknown");
890                        return format!("{}@{}", repo, &commit[..7.min(commit.len())]);
891                    }
892                }
893            }
894
895            let parts: Vec<&str> = url.split('/').collect();
896            if parts.len() >= 2 {
897                let repo = parts[parts.len() - 1];
898                return format!("{}@main", repo);
899            }
900        } else if url.contains('/') && !url.contains('.') {
901            let parts: Vec<&str> = url.split('@').collect();
902            let repo_part = parts[0];
903            let branch = parts.get(1).unwrap_or(&"main");
904
905            if let Some(repo_name) = repo_part.split('/').last() {
906                return format!("{}@{}", repo_name, branch);
907            }
908        }
909
910        "remote-project".to_string()
911    }
912
913    #[allow(dead_code)]
914    fn extract_project_name(&self, url: &str) -> String {
915        let url_path = url.trim_end_matches('/');
916
917        if let Some(filename) = url_path.split('/').last() {
918            if filename.ends_with(".tar.gz") {
919                return filename.trim_end_matches(".tar.gz").to_string();
920            }
921            if filename.ends_with(".tgz") {
922                return filename.trim_end_matches(".tgz").to_string();
923            }
924            return filename.to_string();
925        }
926
927        "remote-project".to_string()
928    }
929
930    fn format_bytes_simple(bytes: u64) -> String {
931        const UNITS: &[&str] = &["B", "KiB", "MiB", "GiB", "TiB"];
932        const THRESHOLD: f64 = 1024.0;
933
934        if bytes == 0 {
935            return "0 B".to_string();
936        }
937
938        let mut size = bytes as f64;
939        let mut unit_index = 0;
940
941        while size >= THRESHOLD && unit_index < UNITS.len() - 1 {
942            size /= THRESHOLD;
943            unit_index += 1;
944        }
945
946        if unit_index == 0 {
947            format!("{} {}", bytes, UNITS[unit_index])
948        } else {
949            format!("{:.1} {}", size, UNITS[unit_index])
950        }
951    }
952}
953
954impl Default for RemoteAnalyzer {
955    fn default() -> Self {
956        Self::new()
957    }
958}
959
960fn analyze_file_content(
961    file_path: &str,
962    content: &str,
963    language: &str,
964    file_size: u64,
965) -> Result<FileMetrics> {
966    let lines: Vec<&str> = content.lines().collect();
967    let total_lines = lines.len();
968
969    let mut code_lines = 0;
970    let mut comment_lines = 0;
971    let mut blank_lines = 0;
972
973    let lang_def = LanguageRegistry::get_language(language);
974    let empty_line_comments = vec![];
975    let empty_multi_line_comments = vec![];
976    let line_comments = lang_def
977        .map(|l| &l.line_comments)
978        .unwrap_or(&empty_line_comments);
979    let multi_line_comments = lang_def
980        .map(|l| &l.multi_line_comments)
981        .unwrap_or(&empty_multi_line_comments);
982
983    let mut in_multi_line_comment = false;
984
985    for line in lines {
986        let trimmed = line.trim();
987
988        if trimmed.is_empty() {
989            blank_lines += 1;
990            continue;
991        }
992
993        let mut is_comment = false;
994
995        if !in_multi_line_comment {
996            for comment_start in line_comments {
997                if trimmed.starts_with(comment_start) {
998                    is_comment = true;
999                    break;
1000                }
1001            }
1002
1003            for (start, end) in multi_line_comments {
1004                if trimmed.starts_with(start) {
1005                    is_comment = true;
1006                    if !trimmed.ends_with(end) {
1007                        in_multi_line_comment = true;
1008                    }
1009                    break;
1010                }
1011            }
1012        } else {
1013            is_comment = true;
1014            for (_, end) in multi_line_comments {
1015                if trimmed.ends_with(end) {
1016                    in_multi_line_comment = false;
1017                    break;
1018                }
1019            }
1020        }
1021
1022        if is_comment {
1023            comment_lines += 1;
1024        } else {
1025            code_lines += 1;
1026        }
1027    }
1028
1029    let metrics = FileMetrics::new(
1030        file_path,
1031        language.to_string(),
1032        total_lines,
1033        code_lines,
1034        comment_lines,
1035        blank_lines,
1036    )?
1037    .with_size_bytes(file_size);
1038
1039    Ok(metrics)
1040}
1041
1042#[cfg(test)]
1043mod tests {
1044    use super::*;
1045
1046    #[test]
1047    fn test_github_url_parsing() {
1048        let analyzer = RemoteAnalyzer::new();
1049
1050        assert_eq!(
1051            analyzer.parse_github_url("https://github.com/user/repo", "main"),
1052            Some("https://github.com/user/repo/archive/refs/heads/main.tar.gz".to_string())
1053        );
1054
1055        assert_eq!(
1056            analyzer.parse_github_url("https://github.com/user/repo/commit/abc123", "main"),
1057            Some("https://github.com/user/repo/archive/abc123.tar.gz".to_string())
1058        );
1059    }
1060
1061    #[test]
1062    fn test_bitbucket_url_parsing() {
1063        let analyzer = RemoteAnalyzer::new();
1064
1065        assert_eq!(
1066            analyzer.parse_bitbucket_url("https://bitbucket.org/user/repo", "main"),
1067            Some("https://bitbucket.org/user/repo/get/main.tar.gz".to_string())
1068        );
1069    }
1070
1071    #[test]
1072    fn test_codeberg_url_parsing() {
1073        let analyzer = RemoteAnalyzer::new();
1074
1075        assert_eq!(
1076            analyzer.parse_codeberg_url("https://codeberg.org/user/repo", "main"),
1077            Some("https://codeberg.org/user/repo/archive/main.tar.gz".to_string())
1078        );
1079    }
1080
1081    #[test]
1082    fn test_extract_project_name() {
1083        let analyzer = RemoteAnalyzer::new();
1084
1085        assert_eq!(
1086            analyzer.extract_project_name("https://example.com/project.tar.gz"),
1087            "project"
1088        );
1089
1090        assert_eq!(
1091            analyzer.extract_project_name("https://github.com/user/repo/archive/main.tar.gz"),
1092            "main"
1093        );
1094    }
1095}
1096
1097use tokio::sync::mpsc;
1098
1099struct StreamReader {
1100    receiver: mpsc::Receiver<std::io::Result<bytes::Bytes>>,
1101    current_chunk: Option<Cursor<bytes::Bytes>>,
1102    finished: bool,
1103}
1104
1105impl StreamReader {
1106    #[cfg(not(target_arch = "wasm32"))]
1107    fn new(
1108        stream: impl futures_util::Stream<Item = reqwest::Result<bytes::Bytes>> + Send + 'static,
1109        #[cfg(feature = "cli")] progress_bar: Option<ProgressBar>,
1110        total_size: Option<u64>,
1111    ) -> Self {
1112        let (tx, rx) = mpsc::channel(32);
1113
1114        tokio::spawn(async move {
1115            let mut downloaded = 0u64;
1116            let mut stream = Box::pin(stream);
1117
1118            while let Some(chunk_result) = stream.next().await {
1119                match chunk_result {
1120                    Ok(chunk) => {
1121                        downloaded += chunk.len() as u64;
1122
1123                        #[cfg(feature = "cli")]
1124                        if let Some(pb) = &progress_bar {
1125                            if let Some(_total) = total_size {
1126                                pb.set_position(downloaded);
1127                            } else {
1128                                let formatted = RemoteAnalyzer::format_bytes_simple(downloaded);
1129                                pb.set_message(format!("Downloaded {}...", formatted));
1130                            }
1131                        }
1132
1133                        if tx.send(Ok(chunk)).await.is_err() {
1134                            break;
1135                        }
1136                    }
1137                    Err(e) => {
1138                        let _ = tx
1139                            .send(Err(std::io::Error::new(
1140                                std::io::ErrorKind::Other,
1141                                format!("Stream error: {}", e),
1142                            )))
1143                            .await;
1144                        break;
1145                    }
1146                }
1147            }
1148        });
1149
1150        Self {
1151            receiver: rx,
1152            current_chunk: None,
1153            finished: false,
1154        }
1155    }
1156
1157    #[cfg(target_arch = "wasm32")]
1158    fn new(
1159        stream: impl futures_util::Stream<Item = reqwest::Result<bytes::Bytes>> + 'static,
1160        #[cfg(feature = "cli")] _progress_bar: Option<ProgressBar>,
1161        _total_size: Option<u64>,
1162    ) -> Self {
1163        let (tx, rx) = mpsc::channel(32);
1164
1165        wasm_bindgen_futures::spawn_local(async move {
1166            let mut downloaded = 0u64;
1167            let mut stream = Box::pin(stream);
1168
1169            while let Some(chunk_result) = stream.next().await {
1170                match chunk_result {
1171                    Ok(chunk) => {
1172                        downloaded += chunk.len() as u64;
1173
1174                        #[cfg(feature = "cli")]
1175                        if let Some(pb) = &_progress_bar {
1176                            if let Some(_total) = _total_size {
1177                                pb.set_position(downloaded);
1178                            } else {
1179                                let formatted = RemoteAnalyzer::format_bytes_simple(downloaded);
1180                                pb.set_message(format!("Downloaded {}...", formatted));
1181                            }
1182                        }
1183
1184                        if tx.send(Ok(chunk)).await.is_err() {
1185                            break;
1186                        }
1187                    }
1188                    Err(e) => {
1189                        let _ = tx
1190                            .send(Err(std::io::Error::new(
1191                                std::io::ErrorKind::Other,
1192                                format!("Stream error: {}", e),
1193                            )))
1194                            .await;
1195                        break;
1196                    }
1197                }
1198            }
1199        });
1200
1201        Self {
1202            receiver: rx,
1203            current_chunk: None,
1204            finished: false,
1205        }
1206    }
1207}
1208
1209impl Read for StreamReader {
1210    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
1211        if let Some(ref mut cursor) = self.current_chunk {
1212            let read = cursor.read(buf)?;
1213            if read > 0 {
1214                return Ok(read);
1215            }
1216            self.current_chunk = None;
1217        }
1218
1219        if self.finished {
1220            return Ok(0);
1221        }
1222
1223        match self.receiver.try_recv() {
1224            Ok(Ok(chunk)) => {
1225                self.current_chunk = Some(Cursor::new(chunk));
1226                if let Some(ref mut cursor) = self.current_chunk {
1227                    cursor.read(buf)
1228                } else {
1229                    Ok(0)
1230                }
1231            }
1232            Ok(Err(e)) => {
1233                self.finished = true;
1234                Err(e)
1235            }
1236            Err(mpsc::error::TryRecvError::Empty) => {
1237                #[cfg(not(target_arch = "wasm32"))]
1238                {
1239                    match self.receiver.blocking_recv() {
1240                        Some(Ok(chunk)) => {
1241                            self.current_chunk = Some(Cursor::new(chunk));
1242                            if let Some(ref mut cursor) = self.current_chunk {
1243                                cursor.read(buf)
1244                            } else {
1245                                Ok(0)
1246                            }
1247                        }
1248                        Some(Err(e)) => {
1249                            self.finished = true;
1250                            Err(e)
1251                        }
1252                        None => {
1253                            self.finished = true;
1254                            Ok(0)
1255                        }
1256                    }
1257                }
1258                #[cfg(target_arch = "wasm32")]
1259                {
1260                    Err(std::io::Error::new(
1261                        std::io::ErrorKind::WouldBlock,
1262                        "Would block in WASM",
1263                    ))
1264                }
1265            }
1266            Err(mpsc::error::TryRecvError::Disconnected) => {
1267                self.finished = true;
1268                Ok(0)
1269            }
1270        }
1271    }
1272}