Skip to main content

provenant/parsers/
poetry_lock.rs

1//! Parser for Poetry poetry.lock lockfiles.
2//!
3//! Extracts resolved dependency information from Poetry lockfiles which use TOML format
4//! to store resolved versions and metadata for Python dependencies.
5//!
6//! # Supported Formats
7//! - poetry.lock (TOML-based lockfile with package metadata)
8//!
9//! # Key Features
10//! - Direct vs transitive dependency tracking via `is_direct` flag
11//! - Dependency groups support (main, dev, etc.) via scope field
12//! - Dependency resolution with exact versions
13//! - Package URL (purl) generation for PyPI packages
14//! - Extra dependencies and optional package handling
15//!
16//! # Implementation Notes
17//! - Uses TOML parsing via `toml` crate
18//! - All lockfile versions are pinned (`is_pinned: Some(true)`)
19//! - Graceful error handling with `warn!()` logs
20//! - Integrates with Python parser utilities for PyPI URL building
21
22use std::collections::HashMap;
23use std::path::Path;
24
25use crate::parser_warn as warn;
26use packageurl::PackageUrl;
27use toml::Value as TomlValue;
28use toml::map::Map as TomlMap;
29
30use crate::models::{
31    DatasourceId, Dependency, PackageData, PackageType, ResolvedPackage, Sha256Digest,
32};
33use crate::parsers::python::{build_pypi_urls, read_toml_file};
34
35use super::PackageParser;
36
37const FIELD_PACKAGE: &str = "package";
38const FIELD_METADATA: &str = "metadata";
39const FIELD_NAME: &str = "name";
40const FIELD_VERSION: &str = "version";
41const FIELD_PYTHON_VERSIONS: &str = "python-versions";
42const FIELD_DEPENDENCIES: &str = "dependencies";
43const FIELD_EXTRAS: &str = "extras";
44const FIELD_LOCK_VERSION: &str = "lock-version";
45
46/// Poetry lockfile parser for poetry.lock files.
47///
48/// Extracts pinned Python package dependencies from Poetry-managed projects.
49pub struct PoetryLockParser;
50
51impl PackageParser for PoetryLockParser {
52    const PACKAGE_TYPE: PackageType = PackageType::Pypi;
53
54    fn is_match(path: &Path) -> bool {
55        path.file_name()
56            .and_then(|name| name.to_str())
57            .map(|name| name == "poetry.lock")
58            .unwrap_or(false)
59    }
60
61    fn extract_packages(path: &Path) -> Vec<PackageData> {
62        let toml_content = match read_toml_file(path) {
63            Ok(content) => content,
64            Err(e) => {
65                warn!("Failed to read poetry.lock at {:?}: {}", path, e);
66                return vec![default_package_data()];
67            }
68        };
69
70        vec![parse_poetry_lock(&toml_content)]
71    }
72}
73
74fn parse_poetry_lock(toml_content: &TomlValue) -> PackageData {
75    let packages = toml_content
76        .get(FIELD_PACKAGE)
77        .and_then(|value| value.as_array())
78        .cloned()
79        .unwrap_or_default();
80
81    let metadata = toml_content
82        .get(FIELD_METADATA)
83        .and_then(|value| value.as_table());
84
85    let mut dependencies = Vec::new();
86    for package in packages {
87        if let Some(package_table) = package.as_table()
88            && let Some(dependency) = build_dependency_from_package(package_table)
89        {
90            dependencies.push(dependency);
91        }
92    }
93
94    PackageData {
95        package_type: Some(PoetryLockParser::PACKAGE_TYPE),
96        namespace: None,
97        name: None,
98        version: None,
99        qualifiers: None,
100        subpath: None,
101        primary_language: Some("Python".to_string()),
102        description: None,
103        release_date: None,
104        parties: Vec::new(),
105        keywords: Vec::new(),
106        homepage_url: None,
107        download_url: None,
108        size: None,
109        sha1: None,
110        md5: None,
111        sha256: None,
112        sha512: None,
113        bug_tracking_url: None,
114        code_view_url: None,
115        vcs_url: None,
116        copyright: None,
117        holder: None,
118        declared_license_expression: None,
119        declared_license_expression_spdx: None,
120        license_detections: Vec::new(),
121        other_license_expression: None,
122        other_license_expression_spdx: None,
123        other_license_detections: Vec::new(),
124        extracted_license_statement: None,
125        notice_text: None,
126        source_packages: Vec::new(),
127        file_references: Vec::new(),
128        is_private: false,
129        is_virtual: false,
130        extra_data: build_metadata_extra_data(metadata),
131        dependencies,
132        repository_homepage_url: None,
133        repository_download_url: None,
134        api_data_url: None,
135        datasource_id: Some(DatasourceId::PypiPoetryLock),
136        purl: None,
137    }
138}
139
140fn build_metadata_extra_data(
141    metadata: Option<&TomlMap<String, TomlValue>>,
142) -> Option<HashMap<String, serde_json::Value>> {
143    let mut extra_data = HashMap::new();
144
145    if let Some(metadata) = metadata {
146        if let Some(python_versions) = metadata
147            .get(FIELD_PYTHON_VERSIONS)
148            .and_then(|value| value.as_str())
149            && !python_versions.is_empty()
150        {
151            extra_data.insert(
152                "python_version".to_string(),
153                serde_json::Value::String(python_versions.to_string()),
154            );
155        }
156
157        if let Some(lock_version) = metadata.get(FIELD_LOCK_VERSION) {
158            let lock_version = lock_version
159                .as_str()
160                .map(|value| value.to_string())
161                .or_else(|| lock_version.as_integer().map(|value| value.to_string()));
162
163            if let Some(lock_version) = lock_version
164                && !lock_version.is_empty()
165            {
166                extra_data.insert(
167                    "lock_version".to_string(),
168                    serde_json::Value::String(lock_version),
169                );
170            }
171        }
172    }
173
174    if extra_data.is_empty() {
175        None
176    } else {
177        Some(extra_data)
178    }
179}
180
181fn build_dependency_from_package(package_table: &TomlMap<String, TomlValue>) -> Option<Dependency> {
182    let name = package_table
183        .get(FIELD_NAME)
184        .and_then(|value| value.as_str())
185        .map(normalize_pypi_name)?;
186
187    let version = package_table
188        .get(FIELD_VERSION)
189        .and_then(|value| value.as_str())
190        .map(|value| value.to_string())?;
191
192    let purl = create_pypi_purl(&name, Some(&version));
193
194    let resolved_package = build_resolved_package(package_table, &name, &version);
195
196    let poetry_optional = package_table
197        .get("optional")
198        .and_then(|value| value.as_bool())
199        .unwrap_or(false);
200
201    let extra_data = Some(HashMap::from([(
202        "poetry_optional".to_string(),
203        serde_json::Value::Bool(poetry_optional),
204    )]));
205
206    Some(Dependency {
207        purl,
208        extracted_requirement: None,
209        scope: None,
210        is_runtime: None,
211        is_optional: None,
212        is_pinned: Some(true),
213        is_direct: Some(false),
214        resolved_package: Some(Box::new(resolved_package)),
215        extra_data,
216    })
217}
218
219fn build_resolved_package(
220    package_table: &TomlMap<String, TomlValue>,
221    name: &str,
222    version: &str,
223) -> ResolvedPackage {
224    let dependencies = extract_package_dependencies(package_table);
225
226    let (repository_homepage_url, repository_download_url, api_data_url, purl) =
227        build_pypi_urls(Some(name), Some(version));
228
229    // Extract sha256 hash from files array (first file's hash)
230    let sha256 = extract_sha256_from_files(package_table);
231
232    ResolvedPackage {
233        primary_language: Some("Python".to_string()),
234        download_url: None,
235        sha1: None,
236        sha256: sha256.and_then(|h| Sha256Digest::from_hex(&h).ok()),
237        sha512: None,
238        md5: None,
239        is_virtual: true,
240        extra_data: None,
241        dependencies,
242        repository_homepage_url,
243        repository_download_url,
244        api_data_url,
245        datasource_id: Some(DatasourceId::PypiPoetryLock),
246        purl,
247        ..ResolvedPackage::new(
248            PoetryLockParser::PACKAGE_TYPE,
249            String::new(),
250            name.to_string(),
251            version.to_string(),
252        )
253    }
254}
255
256fn extract_package_dependencies(package_table: &TomlMap<String, TomlValue>) -> Vec<Dependency> {
257    let mut dependencies = Vec::new();
258
259    if let Some(dep_table) = package_table
260        .get(FIELD_DEPENDENCIES)
261        .and_then(|value| value.as_table())
262    {
263        for (dep_name, dep_value) in dep_table {
264            if let Some(dependency) = build_dependency_from_table(dep_name, dep_value) {
265                dependencies.push(dependency);
266            }
267        }
268    }
269
270    if let Some(extras_table) = package_table
271        .get(FIELD_EXTRAS)
272        .and_then(|value| value.as_table())
273    {
274        for (extra_name, extra_values) in extras_table {
275            if let Some(extra_list) = extra_values.as_array() {
276                for extra in extra_list {
277                    if let Some(spec) = extra.as_str()
278                        && let Some(dependency) = build_dependency_from_extra(extra_name, spec)
279                    {
280                        dependencies.push(dependency);
281                    }
282                }
283            }
284        }
285    }
286
287    dependencies
288}
289
290fn build_dependency_from_table(dep_name: &str, dep_value: &TomlValue) -> Option<Dependency> {
291    let (requirement, is_optional) = match dep_value {
292        TomlValue::String(value) => (Some(value.to_string()), false),
293        TomlValue::Table(table) => (
294            table
295                .get(FIELD_VERSION)
296                .and_then(|value| value.as_str())
297                .map(|value| value.to_string()),
298            table
299                .get("optional")
300                .and_then(|value| value.as_bool())
301                .unwrap_or(false),
302        ),
303        _ => (None, false),
304    };
305
306    let normalized_name = normalize_pypi_name(dep_name);
307    let purl = create_pypi_purl(&normalized_name, None);
308
309    Some(Dependency {
310        purl,
311        extracted_requirement: requirement,
312        scope: Some(FIELD_DEPENDENCIES.to_string()),
313        is_runtime: Some(true),
314        is_optional: Some(is_optional),
315        is_pinned: Some(false),
316        is_direct: Some(true),
317        resolved_package: None,
318        extra_data: None,
319    })
320}
321
322fn build_dependency_from_extra(extra_name: &str, spec: &str) -> Option<Dependency> {
323    let (name, requirement) = parse_poetry_dependency_spec(spec)?;
324    let purl = create_pypi_purl(&name, None);
325
326    Some(Dependency {
327        purl,
328        extracted_requirement: requirement,
329        scope: Some(extra_name.to_string()),
330        is_runtime: None,
331        is_optional: Some(true),
332        is_pinned: Some(false),
333        is_direct: Some(true),
334        resolved_package: None,
335        extra_data: None,
336    })
337}
338
339fn parse_poetry_dependency_spec(spec: &str) -> Option<(String, Option<String>)> {
340    let trimmed = spec.trim();
341    if trimmed.is_empty() {
342        return None;
343    }
344
345    if let Some(paren_pos) = trimmed.find(" (") {
346        let name_part = trimmed[..paren_pos].trim();
347        let requirement_part = trimmed[paren_pos + 2..].trim();
348        let requirement = requirement_part.trim_end_matches(')').trim();
349        if name_part.is_empty() {
350            return None;
351        }
352        let normalized_name = normalize_pypi_name(name_part);
353        let requirement = if requirement.is_empty() {
354            None
355        } else {
356            Some(requirement.to_string())
357        };
358        return Some((normalized_name, requirement));
359    }
360
361    Some((normalize_pypi_name(trimmed), None))
362}
363
364fn normalize_pypi_name(name: &str) -> String {
365    name.trim().to_ascii_lowercase()
366}
367
368fn create_pypi_purl(name: &str, version: Option<&str>) -> Option<String> {
369    if name.contains('[') || name.contains(']') {
370        return Some(build_manual_pypi_purl(name, version));
371    }
372
373    if let Ok(mut purl) = PackageUrl::new(PoetryLockParser::PACKAGE_TYPE.as_str(), name) {
374        if let Some(version) = version
375            && purl.with_version(version).is_err()
376        {
377            return None;
378        }
379        return Some(purl.to_string());
380    }
381
382    Some(build_manual_pypi_purl(name, version))
383}
384
385fn build_manual_pypi_purl(name: &str, version: Option<&str>) -> String {
386    let encoded_name = encode_pypi_name(name);
387    let mut purl = format!("pkg:pypi/{}", encoded_name);
388    if let Some(version) = version
389        && !version.is_empty()
390    {
391        purl.push('@');
392        purl.push_str(version);
393    }
394    purl
395}
396
397fn encode_pypi_name(name: &str) -> String {
398    name.replace('[', "%5b").replace(']', "%5d")
399}
400
401fn extract_sha256_from_files(package_table: &TomlMap<String, TomlValue>) -> Option<String> {
402    package_table
403        .get("files")
404        .and_then(|files| files.as_array())
405        .and_then(|files_array| files_array.first())
406        .and_then(|first_file| first_file.as_table())
407        .and_then(|file_table| file_table.get("hash"))
408        .and_then(|hash_value| hash_value.as_str())
409        .and_then(|hash_str| hash_str.strip_prefix("sha256:").map(|s| s.to_string()))
410}
411
412fn default_package_data() -> PackageData {
413    PackageData {
414        package_type: Some(PoetryLockParser::PACKAGE_TYPE),
415        primary_language: Some("Python".to_string()),
416        datasource_id: Some(DatasourceId::PypiPoetryLock),
417        ..Default::default()
418    }
419}
420
421crate::register_parser!(
422    "Poetry lockfile",
423    &["**/poetry.lock"],
424    "pypi",
425    "Python",
426    Some("https://python-poetry.org/docs/basic-usage/#installing-with-poetrylock"),
427);