1use crate::models::{Dependency, LicenseDetection, Match, PackageData, Party};
2use log::warn;
3use packageurl::PackageUrl;
4use std::fs::File;
5use std::io::Read;
6use std::path::Path;
7use toml::Value as TomlValue;
8use toml::map::Map as TomlMap;
9
10use super::PackageParser;
11
12const FIELD_PROJECT: &str = "project";
14const FIELD_NAME: &str = "name";
15const FIELD_VERSION: &str = "version";
16const FIELD_LICENSE: &str = "license";
17const FIELD_AUTHORS: &str = "authors";
18const FIELD_MAINTAINERS: &str = "maintainers";
19const FIELD_URLS: &str = "urls";
20const FIELD_HOMEPAGE: &str = "homepage";
21const FIELD_REPOSITORY: &str = "repository";
22const FIELD_DEPENDENCIES: &str = "dependencies";
23const FIELD_OPTIONAL_DEPENDENCIES: &str = "optional-dependencies";
24
25pub struct PythonParser;
26
27impl PackageParser for PythonParser {
28 const PACKAGE_TYPE: &'static str = "pypi";
29
30 fn extract_package_data(path: &Path) -> PackageData {
31 if path.file_name().unwrap_or_default() == "pyproject.toml" {
32 extract_from_pyproject_toml(path)
33 } else if path.file_name().unwrap_or_default() == "setup.py" {
34 extract_from_setup_py(path)
35 } else {
36 default_package_data()
37 }
38 }
39
40 fn is_match(path: &Path) -> bool {
41 if let Some(filename) = path.file_name() {
42 filename == "pyproject.toml" || filename == "setup.py"
43 } else {
44 false
45 }
46 }
47}
48
49fn extract_from_pyproject_toml(path: &Path) -> PackageData {
50 let toml_content = match read_toml_file(path) {
51 Ok(content) => content,
52 Err(e) => {
53 warn!(
54 "Failed to read or parse pyproject.toml at {:?}: {}",
55 path, e
56 );
57 return default_package_data();
58 }
59 };
60
61 let project_table =
63 if let Some(project) = toml_content.get(FIELD_PROJECT).and_then(|v| v.as_table()) {
64 project.clone()
66 } else if toml_content.get(FIELD_NAME).is_some() {
67 match toml_content.as_table() {
69 Some(table) => table.clone(),
70 None => {
71 warn!("Failed to convert TOML content to table in {:?}", path);
72 return default_package_data();
73 }
74 }
75 } else {
76 warn!("No project data found in pyproject.toml at {:?}", path);
77 return default_package_data();
78 };
79
80 let name = project_table
81 .get(FIELD_NAME)
82 .and_then(|v| v.as_str())
83 .map(String::from);
84
85 let version = project_table
86 .get(FIELD_VERSION)
87 .and_then(|v| v.as_str())
88 .map(String::from);
89
90 let license_detections = extract_license_info(&project_table);
91
92 let (homepage_url, repository_url) = extract_urls(&project_table);
94
95 let (dependencies, optional_dependencies) = extract_dependencies(&project_table);
96
97 let purl = name.as_ref().map(|n| {
99 let mut package_url =
100 PackageUrl::new(PythonParser::PACKAGE_TYPE, n).expect("Failed to create PackageUrl");
101
102 if let Some(v) = &version {
103 package_url.with_version(v).expect("Failed to set version");
104 }
105
106 package_url.to_string()
107 });
108
109 PackageData {
110 package_type: Some(PythonParser::PACKAGE_TYPE.to_string()),
111 namespace: None, name,
113 version,
114 homepage_url,
115 download_url: repository_url,
116 copyright: None,
117 license_detections,
118 dependencies: [dependencies, optional_dependencies].concat(),
119 parties: extract_parties(&project_table),
120 purl,
121 }
122}
123
124fn extract_license_info(project: &TomlMap<String, TomlValue>) -> Vec<LicenseDetection> {
125 let mut detections = Vec::new();
126
127 if let Some(license_value) = project.get(FIELD_LICENSE) {
129 match license_value {
130 TomlValue::String(license_str) => {
131 detections.push(create_license_detection(license_str));
132 }
133 TomlValue::Table(license_table) => {
134 if let Some(text) = license_table.get("text").and_then(|v| v.as_str()) {
135 detections.push(create_license_detection(text));
136 }
137 if let Some(expr) = license_table.get("expression").and_then(|v| v.as_str()) {
138 detections.push(create_license_detection(expr));
139 }
140 }
141 _ => {}
142 }
143 }
144
145 detections
146}
147
148fn create_license_detection(license_str: &str) -> LicenseDetection {
149 LicenseDetection {
150 license_expression: license_str.to_string(),
151 matches: vec![Match {
152 score: 100.0,
153 start_line: 0, end_line: 0,
155 license_expression: license_str.to_string(),
156 rule_identifier: None,
157 matched_text: None,
158 }],
159 }
160}
161
162fn extract_urls(project: &TomlMap<String, TomlValue>) -> (Option<String>, Option<String>) {
163 let mut homepage_url = None;
164 let mut repository_url = None;
165
166 if let Some(urls) = project.get(FIELD_URLS).and_then(|v| v.as_table()) {
168 homepage_url = urls
169 .get(FIELD_HOMEPAGE)
170 .and_then(|v| v.as_str())
171 .map(String::from);
172 repository_url = urls
173 .get(FIELD_REPOSITORY)
174 .and_then(|v| v.as_str())
175 .map(String::from);
176 }
177
178 if homepage_url.is_none() {
180 homepage_url = project
181 .get(FIELD_HOMEPAGE)
182 .and_then(|v| v.as_str())
183 .map(String::from);
184 }
185
186 if repository_url.is_none() {
187 repository_url = project
188 .get(FIELD_REPOSITORY)
189 .and_then(|v| v.as_str())
190 .map(String::from);
191 }
192
193 (homepage_url, repository_url)
194}
195
196fn extract_parties(project: &TomlMap<String, TomlValue>) -> Vec<Party> {
197 let mut parties = Vec::new();
198
199 if let Some(authors) = project.get(FIELD_AUTHORS).and_then(|v| v.as_array()) {
201 for author in authors {
202 if let Some(author_str) = author.as_str()
203 && let Some(email) = extract_email_from_author_string(author_str)
204 {
205 parties.push(Party { email })
206 }
207 }
208 }
209
210 if let Some(maintainers) = project.get(FIELD_MAINTAINERS).and_then(|v| v.as_array()) {
212 for maintainer in maintainers {
213 if let Some(maintainer_str) = maintainer.as_str()
214 && let Some(email) = extract_email_from_author_string(maintainer_str)
215 {
216 parties.push(Party { email })
217 }
218 }
219 }
220
221 parties
222}
223
224fn extract_email_from_author_string(author_str: &str) -> Option<String> {
225 if let Some(email_start) = author_str.find('<')
227 && let Some(email_end) = author_str.find('>')
228 && email_start < email_end
229 {
230 return Some(author_str[email_start + 1..email_end].to_string());
231 }
232
233 None
234}
235
236fn extract_dependencies(
237 project: &TomlMap<String, TomlValue>,
238) -> (Vec<Dependency>, Vec<Dependency>) {
239 let mut dependencies = Vec::new();
240 let mut optional_dependencies = Vec::new();
241
242 if let Some(deps_value) = project.get(FIELD_DEPENDENCIES) {
244 match deps_value {
245 TomlValue::Array(arr) => {
246 dependencies = parse_dependency_array(arr, false);
247 }
248 TomlValue::Table(table) => {
249 dependencies = parse_dependency_table(table, false);
250 }
251 _ => {}
252 }
253 }
254
255 if let Some(opt_deps_table) = project
257 .get(FIELD_OPTIONAL_DEPENDENCIES)
258 .and_then(|v| v.as_table())
259 {
260 for (_feature, deps) in opt_deps_table {
261 match deps {
262 TomlValue::Array(arr) => {
263 optional_dependencies.extend(parse_dependency_array(arr, true));
264 }
265 TomlValue::Table(table) => {
266 optional_dependencies.extend(parse_dependency_table(table, true));
267 }
268 _ => {}
269 }
270 }
271 }
272
273 (dependencies, optional_dependencies)
274}
275
276fn parse_dependency_table(
277 table: &TomlMap<String, TomlValue>,
278 is_optional: bool,
279) -> Vec<Dependency> {
280 table
281 .iter()
282 .filter_map(|(name, version)| {
283 let version_str = version.as_str().map(|s| s.to_string());
285 let mut package_url = PackageUrl::new(PythonParser::PACKAGE_TYPE, name).ok()?;
287
288 if let Some(v) = &version_str {
290 package_url.with_version(v).ok()?;
291 }
292
293 Some(Dependency {
294 purl: Some(package_url.to_string()),
295 scope: None,
296 is_optional,
297 })
298 })
299 .collect()
300}
301
302fn parse_dependency_array(array: &[TomlValue], is_optional: bool) -> Vec<Dependency> {
303 array
304 .iter()
305 .filter_map(|dep| {
306 let dep_str = dep.as_str()?;
307
308 let mut parts = dep_str.split(['>', '=', '<', '~']);
311 let name = parts.next()?.trim().to_string();
312
313 let version = parts.next().map(|v| v.trim().to_string());
315
316 let mut package_url = match PackageUrl::new(PythonParser::PACKAGE_TYPE, &name) {
317 Ok(purl) => purl,
318 Err(_) => return None,
319 };
320
321 if let Some(ref v) = version {
322 package_url.with_version(v).ok()?;
323 }
324
325 Some(Dependency {
326 purl: Some(package_url.to_string()),
327 scope: None,
328 is_optional,
329 })
330 })
331 .collect()
332}
333
334fn extract_from_setup_py(path: &Path) -> PackageData {
335 let content = match read_file_to_string(path) {
339 Ok(content) => content,
340 Err(e) => {
341 warn!("Failed to read setup.py at {:?}: {}", path, e);
342 return default_package_data();
343 }
344 };
345
346 let name = extract_setup_value(&content, "name");
347 let version = extract_setup_value(&content, "version");
348 let license_expression = extract_setup_value(&content, "license");
349
350 let license_detections = license_expression.as_ref().map_or(Vec::new(), |license| {
352 vec![LicenseDetection {
353 license_expression: license.clone(),
354 matches: vec![Match {
355 score: 100.0,
356 start_line: 0, end_line: 0,
358 license_expression: license.clone(),
359 rule_identifier: None,
360 matched_text: None,
361 }],
362 }]
363 });
364
365 let purl = name.as_ref().map(|n| {
367 let mut package_url =
368 PackageUrl::new(PythonParser::PACKAGE_TYPE, n).expect("Failed to create PackageUrl");
369
370 if let Some(v) = &version {
371 package_url.with_version(v).expect("Failed to set version");
372 }
373
374 package_url.to_string()
375 });
376
377 PackageData {
378 package_type: Some(PythonParser::PACKAGE_TYPE.to_string()),
379 namespace: None,
380 name,
381 version,
382 homepage_url: extract_setup_value(&content, "url"),
383 download_url: None,
384 copyright: None,
385 license_detections,
386 dependencies: Vec::new(), parties: Vec::new(), purl,
389 }
390}
391
392fn extract_setup_value(content: &str, key: &str) -> Option<String> {
393 let patterns = vec![
396 format!("{}=\"", key), format!("{} =\"", key), format!("{}= \"", key), format!("{} = \"", key), format!("{}='", key), format!("{} ='", key), format!("{}= '", key), format!("{} = '", key), ];
405
406 for pattern in patterns {
407 if let Some(start_idx) = content.find(&pattern) {
408 let value_start = start_idx + pattern.len();
409 let remaining = &content[value_start..];
410
411 if let Some(end_idx) = remaining.find(['"', '\'']) {
413 return Some(remaining[..end_idx].to_string());
414 }
415 }
416 }
417
418 None
419}
420
421fn read_toml_file(path: &Path) -> Result<TomlValue, String> {
423 let content = read_file_to_string(path)?;
424 toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
425}
426
427fn read_file_to_string(path: &Path) -> Result<String, String> {
428 let mut file = File::open(path).map_err(|e| format!("Failed to open file: {}", e))?;
429 let mut content = String::new();
430 file.read_to_string(&mut content)
431 .map_err(|e| format!("Error reading file: {}", e))?;
432 Ok(content)
433}
434
435fn default_package_data() -> PackageData {
436 PackageData {
437 package_type: None,
438 namespace: None,
439 name: None,
440 version: None,
441 homepage_url: None,
442 download_url: None,
443 copyright: None,
444 license_detections: Vec::new(),
445 dependencies: Vec::new(),
446 parties: Vec::new(),
447 purl: None,
448 }
449}