1use crate::askalono::{ScanStrategy, TextData};
2use crate::models::{FileInfo, FileInfoBuilder, FileType, LicenseDetection, Match};
3use crate::parsers::{CargoParser, MavenParser, NpmParser, PackageParser, PythonParser};
4use crate::scanner::ProcessResult;
5use crate::utils::file::{get_creation_date, is_path_excluded};
6use crate::utils::hash::{calculate_md5, calculate_sha1, calculate_sha256};
7use crate::utils::language::detect_language;
8use anyhow::Error;
9use content_inspector::{ContentType, inspect};
10use glob::Pattern;
11use indicatif::ProgressBar;
12use mime_guess::from_path;
13use rayon::prelude::*;
14use std::fs::{self};
15use std::path::Path;
16use std::sync::Arc;
17
18pub fn process<P: AsRef<Path>>(
21 path: P,
22 max_depth: usize,
23 progress_bar: Arc<ProgressBar>,
24 exclude_patterns: &[Pattern],
25 scan_strategy: &ScanStrategy,
26) -> Result<ProcessResult, Error> {
27 let path = path.as_ref();
28
29 if is_path_excluded(path, exclude_patterns) {
30 return Ok(ProcessResult {
31 files: Vec::new(),
32 excluded_count: 1,
33 });
34 }
35
36 let mut all_files = Vec::new();
37 let mut total_excluded = 0;
38
39 let entries: Vec<_> = fs::read_dir(path)?.filter_map(Result::ok).collect();
41
42 let mut file_entries = Vec::new();
43 let mut dir_entries = Vec::new();
44
45 for entry in entries {
46 let path = entry.path();
47
48 if is_path_excluded(&path, exclude_patterns) {
50 total_excluded += 1;
51 continue;
52 }
53
54 match fs::metadata(&path) {
55 Ok(metadata) if metadata.is_file() => file_entries.push((path, metadata)),
56 Ok(metadata) if path.is_dir() => dir_entries.push((path, metadata)),
57 _ => continue,
58 }
59 }
60
61 all_files.append(
63 &mut file_entries
64 .par_iter()
65 .map(|(path, metadata)| {
66 let file_entry = process_file(path, metadata, scan_strategy);
67 progress_bar.inc(1);
68 file_entry
69 })
70 .collect(),
71 );
72
73 for (path, metadata) in dir_entries {
75 all_files.push(process_directory(&path, &metadata));
76
77 if max_depth > 0 {
78 match process(
79 &path,
80 max_depth - 1,
81 progress_bar.clone(),
82 exclude_patterns,
83 scan_strategy,
84 ) {
85 Ok(mut result) => {
86 all_files.append(&mut result.files);
87 total_excluded += result.excluded_count;
88 }
89 Err(e) => eprintln!("Error processing directory {}: {}", path.display(), e),
90 }
91 }
92 }
93
94 Ok(ProcessResult {
95 files: all_files,
96 excluded_count: total_excluded,
97 })
98}
99
100fn process_file(path: &Path, metadata: &fs::Metadata, scan_strategy: &ScanStrategy) -> FileInfo {
101 let mut scan_errors: Vec<String> = vec![];
102 let mut file_info_builder = FileInfoBuilder::default();
103
104 if let Err(e) = extract_information_from_content(&mut file_info_builder, path, scan_strategy) {
105 scan_errors.push(e.to_string());
106 };
107
108 file_info_builder
109 .name(path.file_name().unwrap().to_string_lossy().to_string())
110 .base_name(
111 path.file_stem()
112 .unwrap_or_default()
113 .to_string_lossy()
114 .to_string(),
115 )
116 .extension(
117 path.extension()
118 .map_or("".to_string(), |ext| format!(".{}", ext.to_string_lossy())),
119 )
120 .path(path.to_string_lossy().to_string())
121 .file_type(FileType::File)
122 .mime_type(Some(
123 from_path(path)
124 .first_or_octet_stream()
125 .essence_str()
126 .to_string(),
127 ))
128 .size(metadata.len())
129 .date(get_creation_date(metadata))
130 .scan_errors(scan_errors)
131 .build()
132 .expect("FileInformationBuild not completely initialized")
133}
134
135fn extract_information_from_content(
136 file_info_builder: &mut FileInfoBuilder,
137 path: &Path,
138 scan_strategy: &ScanStrategy,
139) -> Result<(), Error> {
140 let buffer = fs::read(path)?;
141
142 file_info_builder
143 .sha1(Some(calculate_sha1(&buffer)))
144 .md5(Some(calculate_md5(&buffer)))
145 .sha256(Some(calculate_sha256(&buffer)))
146 .programming_language(Some(detect_language(path, &buffer)));
147
148 if NpmParser::is_match(path) {
149 let package_data = vec![NpmParser::extract_package_data(path)];
150 file_info_builder.package_data(package_data);
151 Ok(())
152 } else if CargoParser::is_match(path) {
153 let package_data = vec![CargoParser::extract_package_data(path)];
154 file_info_builder.package_data(package_data);
155 Ok(())
156 } else if PythonParser::is_match(path) {
157 let package_data = vec![PythonParser::extract_package_data(path)];
158 file_info_builder.package_data(package_data);
159 Ok(())
160 } else if MavenParser::is_match(path) {
161 let package_data = vec![MavenParser::extract_package_data(path)];
162 file_info_builder.package_data(package_data);
163 Ok(())
164 } else if inspect(&buffer) == ContentType::UTF_8 {
165 extract_license_information(
166 file_info_builder,
167 String::from_utf8_lossy(&buffer).into_owned(),
168 scan_strategy,
169 )
170 } else {
171 Ok(())
172 }
173}
174
175fn extract_license_information(
176 file_info_builder: &mut FileInfoBuilder,
177 text_content: String,
178 scan_strategy: &ScanStrategy,
179) -> Result<(), Error> {
180 if text_content.is_empty() {
182 return Ok(());
183 }
184
185 let license_result = scan_strategy.scan(&TextData::from(text_content.as_str()))?;
186 let license_expr = license_result.license.map(|x| x.name.to_string());
187
188 let license_detections = license_result
189 .containing
190 .iter()
191 .map(|detection| LicenseDetection {
192 license_expression: detection.license.name.to_string(),
193 matches: vec![Match {
194 score: detection.score as f64,
195 start_line: detection.line_range.0,
196 end_line: detection.line_range.1,
197 license_expression: detection.license.name.to_string(),
198 matched_text: None, rule_identifier: None,
200 }],
201 })
202 .collect::<Vec<_>>();
203
204 file_info_builder
205 .license_expression(license_expr)
206 .license_detections(license_detections);
207
208 Ok(())
209}
210
211fn process_directory(path: &Path, metadata: &fs::Metadata) -> FileInfo {
212 let name = path
213 .file_name()
214 .unwrap_or_default()
215 .to_string_lossy()
216 .to_string();
217 let base_name = name.clone(); FileInfo {
220 name,
221 base_name,
222 extension: "".to_string(),
223 path: path.to_string_lossy().to_string(),
224 file_type: FileType::Directory,
225 mime_type: None,
226 size: 0,
227 date: get_creation_date(metadata),
228 sha1: None,
229 md5: None,
230 sha256: None,
231 programming_language: None,
232 package_data: Vec::new(), license_expression: None,
234 copyrights: Vec::new(), license_detections: Vec::new(), urls: Vec::new(), scan_errors: Vec::new(),
238 }
239}