1use crate::askalono::{ScanStrategy, TextData};
2use crate::models::{FileInfo, FileInfoBuilder, FileType, LicenseDetection, Match};
3use crate::parsers::{CargoParser, NpmParser, PythonParser, PackageParser};
4use crate::scanner::ProcessResult;
5use crate::utils::file::{get_creation_date, is_path_excluded};
6use crate::utils::hash::{calculate_md5, calculate_sha1, calculate_sha256};
7use crate::utils::language::detect_language;
8use anyhow::Error;
9use content_inspector::{ContentType, inspect};
10use glob::Pattern;
11use indicatif::ProgressBar;
12use mime_guess::from_path;
13use rayon::prelude::*;
14use std::fs::{self};
15use std::path::Path;
16use std::sync::Arc;
17
18pub fn process<P: AsRef<Path>>(
21 path: P,
22 max_depth: usize,
23 progress_bar: Arc<ProgressBar>,
24 exclude_patterns: &[Pattern],
25 scan_strategy: &ScanStrategy,
26) -> Result<ProcessResult, Error> {
27 let path = path.as_ref();
28
29 if is_path_excluded(path, exclude_patterns) {
30 return Ok(ProcessResult {
31 files: Vec::new(),
32 excluded_count: 1,
33 });
34 }
35
36 let mut all_files = Vec::new();
37 let mut total_excluded = 0;
38
39 let entries: Vec<_> = fs::read_dir(path)?.filter_map(Result::ok).collect();
41
42 let mut file_entries = Vec::new();
43 let mut dir_entries = Vec::new();
44
45 for entry in entries {
46 let path = entry.path();
47
48 if is_path_excluded(&path, exclude_patterns) {
50 total_excluded += 1;
51 continue;
52 }
53
54 match fs::metadata(&path) {
55 Ok(metadata) if metadata.is_file() => file_entries.push((path, metadata)),
56 Ok(metadata) if path.is_dir() => dir_entries.push((path, metadata)),
57 _ => continue,
58 }
59 }
60
61 all_files.append(
63 &mut file_entries
64 .par_iter()
65 .map(|(path, metadata)| {
66 let file_entry = process_file(path, metadata, scan_strategy);
67 progress_bar.inc(1);
68 file_entry
69 })
70 .collect(),
71 );
72
73 for (path, metadata) in dir_entries {
75 all_files.push(process_directory(&path, &metadata));
76
77 if max_depth > 0 {
78 match process(
79 &path,
80 max_depth - 1,
81 progress_bar.clone(),
82 exclude_patterns,
83 scan_strategy,
84 ) {
85 Ok(mut result) => {
86 all_files.append(&mut result.files);
87 total_excluded += result.excluded_count;
88 }
89 Err(e) => eprintln!("Error processing directory {}: {}", path.display(), e),
90 }
91 }
92 }
93
94 Ok(ProcessResult {
95 files: all_files,
96 excluded_count: total_excluded,
97 })
98}
99
100fn process_file(path: &Path, metadata: &fs::Metadata, scan_strategy: &ScanStrategy) -> FileInfo {
101 let mut scan_errors: Vec<String> = vec![];
102 let mut file_info_builder = FileInfoBuilder::default();
103
104 if let Err(e) = extract_information_from_content(&mut file_info_builder, path, scan_strategy) {
105 scan_errors.push(e.to_string());
106 };
107
108 return file_info_builder
109 .name(path.file_name().unwrap().to_string_lossy().to_string())
110 .base_name(
111 path.file_stem()
112 .unwrap_or_default()
113 .to_string_lossy()
114 .to_string(),
115 )
116 .extension(
117 path.extension()
118 .map_or("".to_string(), |ext| format!(".{}", ext.to_string_lossy())),
119 )
120 .path(path.to_string_lossy().to_string())
121 .file_type(FileType::File)
122 .mime_type(Some(
123 from_path(path)
124 .first_or_octet_stream()
125 .essence_str()
126 .to_string(),
127 ))
128 .size(metadata.len())
129 .date(get_creation_date(metadata))
130 .scan_errors(scan_errors)
131 .build()
132 .expect("FileInformationBuild not completely initialized");
133}
134
135fn extract_information_from_content(
136 file_info_builder: &mut FileInfoBuilder,
137 path: &Path,
138 scan_strategy: &ScanStrategy,
139) -> Result<(), Error> {
140 let buffer = fs::read(path)?;
141
142 file_info_builder
143 .sha1(Some(calculate_sha1(&buffer)))
144 .md5(Some(calculate_md5(&buffer)))
145 .sha256(Some(calculate_sha256(&buffer)))
146 .programming_language(Some(detect_language(path, &buffer)));
147
148 if NpmParser::is_match(path) {
149 let package_data = vec![NpmParser::extract_package_data(path)];
150 file_info_builder.package_data(package_data);
151 Ok(())
152 } else if CargoParser::is_match(path) {
153 let package_data = vec![CargoParser::extract_package_data(path)];
154 file_info_builder.package_data(package_data);
155 Ok(())
156 } else if PythonParser::is_match(path) {
157 let package_data = vec![PythonParser::extract_package_data(path)];
158 file_info_builder.package_data(package_data);
159 Ok(())
160 } else if inspect(&buffer) == ContentType::UTF_8 {
161 extract_license_information(
162 file_info_builder,
163 String::from_utf8_lossy(&buffer).into_owned(),
164 scan_strategy,
165 )
166 } else {
167 Ok(())
168 }
169}
170
171fn extract_license_information(
172 file_info_builder: &mut FileInfoBuilder,
173 text_content: String,
174 scan_strategy: &ScanStrategy,
175) -> Result<(), Error> {
176 if text_content.is_empty() {
178 return Ok(());
179 }
180
181 let license_result = scan_strategy.scan(&TextData::from(text_content.as_str()))?;
182 let license_expr = license_result
183 .license
184 .and_then(|x| Some(x.name.to_string()));
185
186 let license_detections = license_result
187 .containing
188 .iter()
189 .map(|detection| LicenseDetection {
190 license_expression: detection.license.name.to_string(),
191 matches: vec![Match {
192 score: detection.score as f64,
193 start_line: detection.line_range.0,
194 end_line: detection.line_range.1,
195 license_expression: detection.license.name.to_string(),
196 matched_text: None, rule_identifier: None,
198 }],
199 })
200 .collect::<Vec<_>>();
201
202 file_info_builder
203 .license_expression(license_expr)
204 .license_detections(license_detections);
205
206 Ok(())
207}
208
209fn process_directory(path: &Path, metadata: &fs::Metadata) -> FileInfo {
210 let name = path
211 .file_name()
212 .unwrap_or_default()
213 .to_string_lossy()
214 .to_string();
215 let base_name = name.clone(); FileInfo {
218 name,
219 base_name,
220 extension: "".to_string(),
221 path: path.to_string_lossy().to_string(),
222 file_type: FileType::Directory,
223 mime_type: None,
224 size: 0,
225 date: get_creation_date(metadata),
226 sha1: None,
227 md5: None,
228 sha256: None,
229 programming_language: None,
230 package_data: Vec::new(), license_expression: None,
232 copyrights: Vec::new(), license_detections: Vec::new(), urls: Vec::new(), scan_errors: Vec::new(),
236 }
237}