1use crate::models::{Dependency, LicenseDetection, Match, PackageData, Party};
2use log::warn;
3use packageurl::PackageUrl;
4use std::fs::File;
5use std::io::Read;
6use std::path::Path;
7use toml::Value as TomlValue;
8use toml::map::Map as TomlMap;
9
10use super::PackageParser;
11
12const FIELD_PROJECT: &str = "project";
14const FIELD_NAME: &str = "name";
15const FIELD_VERSION: &str = "version";
16const FIELD_LICENSE: &str = "license";
17const FIELD_AUTHORS: &str = "authors";
18const FIELD_MAINTAINERS: &str = "maintainers";
19const FIELD_URLS: &str = "urls";
20const FIELD_HOMEPAGE: &str = "homepage";
21const FIELD_REPOSITORY: &str = "repository";
22const FIELD_DEPENDENCIES: &str = "dependencies";
23const FIELD_OPTIONAL_DEPENDENCIES: &str = "optional-dependencies";
24
25pub struct PythonParser;
26
27impl PackageParser for PythonParser {
28 const PACKAGE_TYPE: &'static str = "pypi";
29
30 fn extract_package_data(path: &Path) -> PackageData {
31 if path.file_name().unwrap_or_default() == "pyproject.toml" {
32 extract_from_pyproject_toml(path)
33 } else if path.file_name().unwrap_or_default() == "setup.py" {
34 extract_from_setup_py(path)
35 } else {
36 default_package_data()
37 }
38 }
39
40 fn is_match(path: &Path) -> bool {
41 if let Some(filename) = path.file_name() {
42 filename == "pyproject.toml" || filename == "setup.py"
43 } else {
44 false
45 }
46 }
47}
48
49fn extract_from_pyproject_toml(path: &Path) -> PackageData {
50 let toml_content = match read_toml_file(path) {
51 Ok(content) => content,
52 Err(e) => {
53 warn!("Failed to read or parse pyproject.toml at {:?}: {}", path, e);
54 return default_package_data();
55 }
56 };
57
58 let project_table = if let Some(project) = toml_content.get(FIELD_PROJECT).and_then(|v| v.as_table()) {
60 project.clone()
62 } else if toml_content.get(FIELD_NAME).is_some() {
63 match toml_content.as_table() {
65 Some(table) => table.clone(),
66 None => {
67 warn!("Failed to convert TOML content to table in {:?}", path);
68 return default_package_data();
69 }
70 }
71 } else {
72 warn!("No project data found in pyproject.toml at {:?}", path);
73 return default_package_data();
74 };
75
76 let name = project_table
77 .get(FIELD_NAME)
78 .and_then(|v| v.as_str())
79 .map(String::from);
80
81 let version = project_table
82 .get(FIELD_VERSION)
83 .and_then(|v| v.as_str())
84 .map(String::from);
85
86 let license_detections = extract_license_info(&project_table);
87
88 let (homepage_url, repository_url) = extract_urls(&project_table);
90
91 let (dependencies, optional_dependencies) = extract_dependencies(&project_table);
92
93 let purl = name.as_ref().map(|n| {
95 let mut package_url =
96 PackageUrl::new(PythonParser::PACKAGE_TYPE, n).expect("Failed to create PackageUrl");
97
98 if let Some(v) = &version {
99 package_url.with_version(v);
100 }
101
102 package_url.to_string()
103 });
104
105 PackageData {
106 package_type: Some(PythonParser::PACKAGE_TYPE.to_string()),
107 namespace: None, name,
109 version,
110 homepage_url,
111 download_url: repository_url,
112 copyright: None,
113 license_detections,
114 dependencies: [dependencies, optional_dependencies].concat(),
115 parties: extract_parties(&project_table),
116 purl,
117 }
118}
119
120fn extract_license_info(project: &TomlMap<String, TomlValue>) -> Vec<LicenseDetection> {
121 let mut detections = Vec::new();
122
123 if let Some(license_value) = project.get(FIELD_LICENSE) {
125 match license_value {
126 TomlValue::String(license_str) => {
127 detections.push(create_license_detection(license_str));
128 }
129 TomlValue::Table(license_table) => {
130 if let Some(text) = license_table.get("text").and_then(|v| v.as_str()) {
131 detections.push(create_license_detection(text));
132 }
133 if let Some(expr) = license_table.get("expression").and_then(|v| v.as_str()) {
134 detections.push(create_license_detection(expr));
135 }
136 }
137 _ => {}
138 }
139 }
140
141 detections
142}
143
144fn create_license_detection(license_str: &str) -> LicenseDetection {
145 LicenseDetection {
146 license_expression: license_str.to_string(),
147 matches: vec![Match {
148 score: 100.0,
149 start_line: 0, end_line: 0,
151 license_expression: license_str.to_string(),
152 rule_identifier: None,
153 matched_text: None,
154 }],
155 }
156}
157
158fn extract_urls(
159 project: &TomlMap<String, TomlValue>,
160) -> (Option<String>, Option<String>) {
161 let mut homepage_url = None;
162 let mut repository_url = None;
163
164 if let Some(urls) = project.get(FIELD_URLS).and_then(|v| v.as_table()) {
166 homepage_url = urls
167 .get(FIELD_HOMEPAGE)
168 .and_then(|v| v.as_str())
169 .map(String::from);
170 repository_url = urls
171 .get(FIELD_REPOSITORY)
172 .and_then(|v| v.as_str())
173 .map(String::from);
174 }
175
176 if homepage_url.is_none() {
178 homepage_url = project
179 .get(FIELD_HOMEPAGE)
180 .and_then(|v| v.as_str())
181 .map(String::from);
182 }
183
184 if repository_url.is_none() {
185 repository_url = project
186 .get(FIELD_REPOSITORY)
187 .and_then(|v| v.as_str())
188 .map(String::from);
189 }
190
191 (homepage_url, repository_url)
192}
193
194fn extract_parties(project: &TomlMap<String, TomlValue>) -> Vec<Party> {
195 let mut parties = Vec::new();
196
197 if let Some(authors) = project.get(FIELD_AUTHORS).and_then(|v| v.as_array()) {
199 for author in authors {
200 if let Some(author_str) = author.as_str() {
201 extract_email_from_author_string(author_str)
202 .map(|email| parties.push(Party { email }));
203 }
204 }
205 }
206
207 if let Some(maintainers) = project.get(FIELD_MAINTAINERS).and_then(|v| v.as_array()) {
209 for maintainer in maintainers {
210 if let Some(maintainer_str) = maintainer.as_str() {
211 extract_email_from_author_string(maintainer_str)
212 .map(|email| parties.push(Party { email }));
213 }
214 }
215 }
216
217 parties
218}
219
220fn extract_email_from_author_string(author_str: &str) -> Option<String> {
221 if let Some(email_start) = author_str.find('<') {
223 if let Some(email_end) = author_str.find('>') {
224 if email_start < email_end {
225 return Some(author_str[email_start + 1..email_end].to_string());
226 }
227 }
228 }
229
230 None
231}
232
233fn extract_dependencies(
234 project: &TomlMap<String, TomlValue>,
235) -> (Vec<Dependency>, Vec<Dependency>) {
236 let mut dependencies = Vec::new();
237 let mut optional_dependencies = Vec::new();
238
239 if let Some(deps) = project.get(FIELD_DEPENDENCIES).and_then(|v| v.as_array()) {
241 dependencies = parse_dependency_array(deps, false);
242 }
243
244 if let Some(opt_deps_table) = project
246 .get(FIELD_OPTIONAL_DEPENDENCIES)
247 .and_then(|v| v.as_table())
248 {
249 for (_feature, deps) in opt_deps_table {
250 if let Some(deps_array) = deps.as_array() {
251 optional_dependencies.extend(parse_dependency_array(deps_array, true));
252 }
253 }
254 }
255
256 (dependencies, optional_dependencies)
257}
258
259fn parse_dependency_array(array: &[TomlValue], is_optional: bool) -> Vec<Dependency> {
260 array
261 .iter()
262 .filter_map(|dep| {
263 let dep_str = dep.as_str()?;
264
265 let mut parts = dep_str.split(|c| c == '>' || c == '=' || c == '<' || c == '~');
268 let name = parts.next()?.trim().to_string();
269
270 let version = parts.next().map(|v| v.trim().to_string());
272
273 let mut package_url = match PackageUrl::new(PythonParser::PACKAGE_TYPE, &name) {
274 Ok(purl) => purl,
275 Err(_) => return None,
276 };
277
278 if let Some(ref v) = version {
279 package_url.with_version(v);
280 }
281
282 Some(Dependency {
283 purl: Some(package_url.to_string()),
284 scope: None,
285 is_optional,
286 })
287 })
288 .collect()
289}
290
291fn extract_from_setup_py(path: &Path) -> PackageData {
292 let content = match read_file_to_string(path) {
296 Ok(content) => content,
297 Err(e) => {
298 warn!("Failed to read setup.py at {:?}: {}", path, e);
299 return default_package_data();
300 }
301 };
302
303 let name = extract_setup_value(&content, "name");
304 let version = extract_setup_value(&content, "version");
305 let license_expression = extract_setup_value(&content, "license");
306
307 let license_detections = license_expression.as_ref().map_or(Vec::new(), |license| {
309 vec![LicenseDetection {
310 license_expression: license.clone(),
311 matches: vec![Match {
312 score: 100.0,
313 start_line: 0, end_line: 0,
315 license_expression: license.clone(),
316 rule_identifier: None,
317 matched_text: None,
318 }],
319 }]
320 });
321
322 let purl = name.as_ref().map(|n| {
324 let mut package_url =
325 PackageUrl::new(PythonParser::PACKAGE_TYPE, n).expect("Failed to create PackageUrl");
326
327 if let Some(v) = &version {
328 package_url.with_version(v);
329 }
330
331 package_url.to_string()
332 });
333
334 PackageData {
335 package_type: Some(PythonParser::PACKAGE_TYPE.to_string()),
336 namespace: None,
337 name,
338 version,
339 homepage_url: extract_setup_value(&content, "url"),
340 download_url: None,
341 copyright: None,
342 license_detections,
343 dependencies: Vec::new(), parties: Vec::new(), purl,
346 }
347}
348
349fn extract_setup_value(content: &str, key: &str) -> Option<String> {
350 let patterns = vec![
353 format!("{}=\"", key), format!("{} =\"", key), format!("{}= \"", key), format!("{} = \"", key), format!("{}='", key), format!("{} ='", key), format!("{}= '", key), format!("{} = '", key), ];
362
363 for pattern in patterns {
364 if let Some(start_idx) = content.find(&pattern) {
365 let value_start = start_idx + pattern.len();
366 let remaining = &content[value_start..];
367
368 if let Some(end_idx) = remaining.find(|c| c == '"' || c == '\'') {
370 return Some(remaining[..end_idx].to_string());
371 }
372 }
373 }
374
375 None
376}
377
378fn read_toml_file(path: &Path) -> Result<TomlValue, String> {
380 let content = read_file_to_string(path)?;
381 toml::from_str(&content).map_err(|e| format!("Failed to parse TOML: {}", e))
382}
383
384fn read_file_to_string(path: &Path) -> Result<String, String> {
385 let mut file = File::open(path).map_err(|e| format!("Failed to open file: {}", e))?;
386 let mut content = String::new();
387 file.read_to_string(&mut content)
388 .map_err(|e| format!("Error reading file: {}", e))?;
389 Ok(content)
390}
391
392fn default_package_data() -> PackageData {
393 PackageData {
394 package_type: None,
395 namespace: None,
396 name: None,
397 version: None,
398 homepage_url: None,
399 download_url: None,
400 copyright: None,
401 license_detections: Vec::new(),
402 dependencies: Vec::new(),
403 parties: Vec::new(),
404 purl: None,
405 }
406}