Skip to main content

provenant/parsers/
poetry_lock.rs

1//! Parser for Poetry poetry.lock lockfiles.
2//!
3//! Extracts resolved dependency information from Poetry lockfiles which use TOML format
4//! to store resolved versions and metadata for Python dependencies.
5//!
6//! # Supported Formats
7//! - poetry.lock (TOML-based lockfile with package metadata)
8//!
9//! # Key Features
10//! - Direct vs transitive dependency tracking via `is_direct` flag
11//! - Dependency groups support (main, dev, etc.) via scope field
12//! - Dependency resolution with exact versions
13//! - Package URL (purl) generation for PyPI packages
14//! - Extra dependencies and optional package handling
15//!
16//! # Implementation Notes
17//! - Uses TOML parsing via `toml` crate
18//! - All lockfile versions are pinned (`is_pinned: Some(true)`)
19//! - Graceful error handling with `warn!()` logs
20//! - Integrates with Python parser utilities for PyPI URL building
21
22use std::collections::HashMap;
23use std::path::Path;
24
25use log::warn;
26use packageurl::PackageUrl;
27use toml::Value as TomlValue;
28use toml::map::Map as TomlMap;
29
30use crate::models::{DatasourceId, Dependency, PackageData, PackageType, ResolvedPackage};
31use crate::parsers::python::{build_pypi_urls, read_toml_file};
32
33use super::PackageParser;
34
35const FIELD_PACKAGE: &str = "package";
36const FIELD_METADATA: &str = "metadata";
37const FIELD_NAME: &str = "name";
38const FIELD_VERSION: &str = "version";
39const FIELD_PYTHON_VERSIONS: &str = "python-versions";
40const FIELD_DEPENDENCIES: &str = "dependencies";
41const FIELD_EXTRAS: &str = "extras";
42const FIELD_LOCK_VERSION: &str = "lock-version";
43
44/// Poetry lockfile parser for poetry.lock files.
45///
46/// Extracts pinned Python package dependencies from Poetry-managed projects.
47pub struct PoetryLockParser;
48
49impl PackageParser for PoetryLockParser {
50    const PACKAGE_TYPE: PackageType = PackageType::Pypi;
51
52    fn is_match(path: &Path) -> bool {
53        path.file_name()
54            .and_then(|name| name.to_str())
55            .map(|name| name == "poetry.lock")
56            .unwrap_or(false)
57    }
58
59    fn extract_packages(path: &Path) -> Vec<PackageData> {
60        let toml_content = match read_toml_file(path) {
61            Ok(content) => content,
62            Err(e) => {
63                warn!("Failed to read poetry.lock at {:?}: {}", path, e);
64                return vec![default_package_data()];
65            }
66        };
67
68        vec![parse_poetry_lock(&toml_content)]
69    }
70}
71
72fn parse_poetry_lock(toml_content: &TomlValue) -> PackageData {
73    let packages = toml_content
74        .get(FIELD_PACKAGE)
75        .and_then(|value| value.as_array())
76        .cloned()
77        .unwrap_or_default();
78
79    let metadata = toml_content
80        .get(FIELD_METADATA)
81        .and_then(|value| value.as_table());
82
83    let mut dependencies = Vec::new();
84    for package in packages {
85        if let Some(package_table) = package.as_table()
86            && let Some(dependency) = build_dependency_from_package(package_table)
87        {
88            dependencies.push(dependency);
89        }
90    }
91
92    PackageData {
93        package_type: Some(PoetryLockParser::PACKAGE_TYPE),
94        namespace: None,
95        name: None,
96        version: None,
97        qualifiers: None,
98        subpath: None,
99        primary_language: Some("Python".to_string()),
100        description: None,
101        release_date: None,
102        parties: Vec::new(),
103        keywords: Vec::new(),
104        homepage_url: None,
105        download_url: None,
106        size: None,
107        sha1: None,
108        md5: None,
109        sha256: None,
110        sha512: None,
111        bug_tracking_url: None,
112        code_view_url: None,
113        vcs_url: None,
114        copyright: None,
115        holder: None,
116        declared_license_expression: None,
117        declared_license_expression_spdx: None,
118        license_detections: Vec::new(),
119        other_license_expression: None,
120        other_license_expression_spdx: None,
121        other_license_detections: Vec::new(),
122        extracted_license_statement: None,
123        notice_text: None,
124        source_packages: Vec::new(),
125        file_references: Vec::new(),
126        is_private: false,
127        is_virtual: false,
128        extra_data: build_metadata_extra_data(metadata),
129        dependencies,
130        repository_homepage_url: None,
131        repository_download_url: None,
132        api_data_url: None,
133        datasource_id: Some(DatasourceId::PypiPoetryLock),
134        purl: None,
135    }
136}
137
138fn build_metadata_extra_data(
139    metadata: Option<&TomlMap<String, TomlValue>>,
140) -> Option<HashMap<String, serde_json::Value>> {
141    let mut extra_data = HashMap::new();
142
143    if let Some(metadata) = metadata {
144        if let Some(python_versions) = metadata
145            .get(FIELD_PYTHON_VERSIONS)
146            .and_then(|value| value.as_str())
147            && !python_versions.is_empty()
148        {
149            extra_data.insert(
150                "python_version".to_string(),
151                serde_json::Value::String(python_versions.to_string()),
152            );
153        }
154
155        if let Some(lock_version) = metadata.get(FIELD_LOCK_VERSION) {
156            let lock_version = lock_version
157                .as_str()
158                .map(|value| value.to_string())
159                .or_else(|| lock_version.as_integer().map(|value| value.to_string()));
160
161            if let Some(lock_version) = lock_version
162                && !lock_version.is_empty()
163            {
164                extra_data.insert(
165                    "lock_version".to_string(),
166                    serde_json::Value::String(lock_version),
167                );
168            }
169        }
170    }
171
172    if extra_data.is_empty() {
173        None
174    } else {
175        Some(extra_data)
176    }
177}
178
179fn build_dependency_from_package(package_table: &TomlMap<String, TomlValue>) -> Option<Dependency> {
180    let name = package_table
181        .get(FIELD_NAME)
182        .and_then(|value| value.as_str())
183        .map(normalize_pypi_name)?;
184
185    let version = package_table
186        .get(FIELD_VERSION)
187        .and_then(|value| value.as_str())
188        .map(|value| value.to_string())?;
189
190    let purl = create_pypi_purl(&name, Some(&version));
191
192    let resolved_package = build_resolved_package(package_table, &name, &version);
193
194    let is_optional = package_table
195        .get("optional")
196        .and_then(|value| value.as_bool())
197        .unwrap_or(false);
198
199    Some(Dependency {
200        purl,
201        extracted_requirement: None,
202        scope: None,
203        is_runtime: Some(!is_optional),
204        is_optional: Some(is_optional),
205        is_pinned: Some(true),
206        is_direct: Some(false),
207        resolved_package: Some(Box::new(resolved_package)),
208        extra_data: None,
209    })
210}
211
212fn build_resolved_package(
213    package_table: &TomlMap<String, TomlValue>,
214    name: &str,
215    version: &str,
216) -> ResolvedPackage {
217    let dependencies = extract_package_dependencies(package_table);
218
219    let (repository_homepage_url, repository_download_url, api_data_url, purl) =
220        build_pypi_urls(Some(name), Some(version));
221
222    // Extract sha256 hash from files array (first file's hash)
223    let sha256 = extract_sha256_from_files(package_table);
224
225    ResolvedPackage {
226        package_type: PoetryLockParser::PACKAGE_TYPE,
227        namespace: String::new(),
228        name: name.to_string(),
229        version: version.to_string(),
230        primary_language: Some("Python".to_string()),
231        download_url: None,
232        sha1: None,
233        sha256,
234        sha512: None,
235        md5: None,
236        is_virtual: true,
237        extra_data: None,
238        dependencies,
239        repository_homepage_url,
240        repository_download_url,
241        api_data_url,
242        datasource_id: Some(DatasourceId::PypiPoetryLock),
243        purl,
244    }
245}
246
247fn extract_package_dependencies(package_table: &TomlMap<String, TomlValue>) -> Vec<Dependency> {
248    let mut dependencies = Vec::new();
249
250    if let Some(dep_table) = package_table
251        .get(FIELD_DEPENDENCIES)
252        .and_then(|value| value.as_table())
253    {
254        for (dep_name, dep_value) in dep_table {
255            if let Some(dependency) = build_dependency_from_table(dep_name, dep_value) {
256                dependencies.push(dependency);
257            }
258        }
259    }
260
261    if let Some(extras_table) = package_table
262        .get(FIELD_EXTRAS)
263        .and_then(|value| value.as_table())
264    {
265        for (extra_name, extra_values) in extras_table {
266            if let Some(extra_list) = extra_values.as_array() {
267                for extra in extra_list {
268                    if let Some(spec) = extra.as_str()
269                        && let Some(dependency) = build_dependency_from_extra(extra_name, spec)
270                    {
271                        dependencies.push(dependency);
272                    }
273                }
274            }
275        }
276    }
277
278    dependencies
279}
280
281fn build_dependency_from_table(dep_name: &str, dep_value: &TomlValue) -> Option<Dependency> {
282    let requirement = match dep_value {
283        TomlValue::String(value) => Some(value.to_string()),
284        TomlValue::Table(table) => table
285            .get(FIELD_VERSION)
286            .and_then(|value| value.as_str())
287            .map(|value| value.to_string()),
288        _ => None,
289    };
290
291    let normalized_name = normalize_pypi_name(dep_name);
292    let purl = create_pypi_purl(&normalized_name, None);
293
294    Some(Dependency {
295        purl,
296        extracted_requirement: requirement,
297        scope: Some(FIELD_DEPENDENCIES.to_string()),
298        is_runtime: Some(true),
299        is_optional: Some(false),
300        is_pinned: Some(false),
301        is_direct: Some(true),
302        resolved_package: None,
303        extra_data: None,
304    })
305}
306
307fn build_dependency_from_extra(extra_name: &str, spec: &str) -> Option<Dependency> {
308    let (name, requirement) = parse_poetry_dependency_spec(spec)?;
309    let purl = create_pypi_purl(&name, None);
310
311    Some(Dependency {
312        purl,
313        extracted_requirement: requirement,
314        scope: Some(extra_name.to_string()),
315        is_runtime: Some(false),
316        is_optional: Some(false),
317        is_pinned: Some(false),
318        is_direct: Some(true),
319        resolved_package: None,
320        extra_data: None,
321    })
322}
323
324fn parse_poetry_dependency_spec(spec: &str) -> Option<(String, Option<String>)> {
325    let trimmed = spec.trim();
326    if trimmed.is_empty() {
327        return None;
328    }
329
330    if let Some(paren_pos) = trimmed.find(" (") {
331        let name_part = trimmed[..paren_pos].trim();
332        let requirement_part = trimmed[paren_pos + 2..].trim();
333        let requirement = requirement_part.trim_end_matches(')').trim();
334        if name_part.is_empty() {
335            return None;
336        }
337        let normalized_name = normalize_pypi_name(name_part);
338        let requirement = if requirement.is_empty() {
339            None
340        } else {
341            Some(requirement.to_string())
342        };
343        return Some((normalized_name, requirement));
344    }
345
346    Some((normalize_pypi_name(trimmed), None))
347}
348
349fn normalize_pypi_name(name: &str) -> String {
350    name.trim().to_ascii_lowercase()
351}
352
353fn create_pypi_purl(name: &str, version: Option<&str>) -> Option<String> {
354    if name.contains('[') || name.contains(']') {
355        return Some(build_manual_pypi_purl(name, version));
356    }
357
358    if let Ok(mut purl) = PackageUrl::new(PoetryLockParser::PACKAGE_TYPE.as_str(), name) {
359        if let Some(version) = version
360            && purl.with_version(version).is_err()
361        {
362            return None;
363        }
364        return Some(purl.to_string());
365    }
366
367    Some(build_manual_pypi_purl(name, version))
368}
369
370fn build_manual_pypi_purl(name: &str, version: Option<&str>) -> String {
371    let encoded_name = encode_pypi_name(name);
372    let mut purl = format!("pkg:pypi/{}", encoded_name);
373    if let Some(version) = version
374        && !version.is_empty()
375    {
376        purl.push('@');
377        purl.push_str(version);
378    }
379    purl
380}
381
382fn encode_pypi_name(name: &str) -> String {
383    name.replace('[', "%5b").replace(']', "%5d")
384}
385
386fn extract_sha256_from_files(package_table: &TomlMap<String, TomlValue>) -> Option<String> {
387    package_table
388        .get("files")
389        .and_then(|files| files.as_array())
390        .and_then(|files_array| files_array.first())
391        .and_then(|first_file| first_file.as_table())
392        .and_then(|file_table| file_table.get("hash"))
393        .and_then(|hash_value| hash_value.as_str())
394        .and_then(|hash_str| hash_str.strip_prefix("sha256:").map(|s| s.to_string()))
395}
396
397fn default_package_data() -> PackageData {
398    PackageData {
399        package_type: Some(PoetryLockParser::PACKAGE_TYPE),
400        primary_language: Some("Python".to_string()),
401        datasource_id: Some(DatasourceId::PypiPoetryLock),
402        ..Default::default()
403    }
404}
405
406crate::register_parser!(
407    "Poetry lockfile",
408    &["**/poetry.lock"],
409    "pypi",
410    "Python",
411    Some("https://python-poetry.org/docs/basic-usage/#installing-with-poetrylock"),
412);