Skip to main content

provenant/parsers/
pipfile_lock.rs

1//! Parser for Pipfile.lock lockfiles.
2//!
3//! Extracts resolved dependency information from Pipfile.lock files which store
4//! locked dependency versions for Python projects using pipenv.
5//!
6//! # Supported Formats
7//! - Pipfile.lock (JSON-based lockfile with per-environment dependency sections)
8//!
9//! # Key Features
10//! - Dependency extraction from default and develop sections
11//! - Direct dependency tracking (top-level locks are direct)
12//! - Exact version resolution with hash verification
13//! - Package URL (purl) generation for PyPI packages
14//! - Markers and extras dependency handling
15//!
16//! # Implementation Notes
17//! - Uses JSON parsing via `serde_json` and TOML for secondary parsing
18//! - All lockfile versions are pinned (`is_pinned: Some(true)`)
19//! - Graceful error handling with `warn!()` logs
20//! - Integrates with Python parser utilities for PyPI URL building
21
22use std::collections::HashMap;
23use std::fs;
24use std::path::Path;
25
26use log::warn;
27use packageurl::PackageUrl;
28use serde_json::Value as JsonValue;
29use toml::Value as TomlValue;
30use toml::map::Map as TomlMap;
31
32use crate::models::{DatasourceId, Dependency, PackageData, PackageType};
33use crate::parsers::python::read_toml_file;
34
35use super::PackageParser;
36
37const FIELD_META: &str = "_meta";
38const FIELD_HASH: &str = "hash";
39const FIELD_SHA256: &str = "sha256";
40const FIELD_DEFAULT: &str = "default";
41const FIELD_DEVELOP: &str = "develop";
42const FIELD_VERSION: &str = "version";
43const FIELD_HASHES: &str = "hashes";
44
45const FIELD_PACKAGES: &str = "packages";
46const FIELD_DEV_PACKAGES: &str = "dev-packages";
47const FIELD_REQUIRES: &str = "requires";
48const FIELD_SOURCE: &str = "source";
49const FIELD_PYTHON_VERSION: &str = "python_version";
50
51/// Pipenv lockfile and manifest parser for Pipfile.lock and Pipfile files.
52///
53/// Extracts Python package dependencies from Pipenv-managed projects, supporting
54/// both locked versions (Pipfile.lock) and declared dependencies (Pipfile).
55pub struct PipfileLockParser;
56
57impl PackageParser for PipfileLockParser {
58    const PACKAGE_TYPE: PackageType = PackageType::Pypi;
59
60    fn is_match(path: &Path) -> bool {
61        path.file_name()
62            .and_then(|name| name.to_str())
63            .map(|name| name == "Pipfile.lock" || name == "Pipfile")
64            .unwrap_or(false)
65    }
66
67    fn extract_packages(path: &Path) -> Vec<PackageData> {
68        vec![match path.file_name().and_then(|name| name.to_str()) {
69            Some("Pipfile.lock") => extract_from_pipfile_lock(path),
70            Some("Pipfile") => extract_from_pipfile(path),
71            _ => default_package_data(None),
72        }]
73    }
74}
75
76fn extract_from_pipfile_lock(path: &Path) -> PackageData {
77    let content = match fs::read_to_string(path) {
78        Ok(content) => content,
79        Err(e) => {
80            warn!("Failed to read Pipfile.lock at {:?}: {}", path, e);
81            return default_package_data(Some(DatasourceId::PipfileLock));
82        }
83    };
84
85    let json_content: JsonValue = match serde_json::from_str(&content) {
86        Ok(content) => content,
87        Err(e) => {
88            warn!("Failed to parse Pipfile.lock at {:?}: {}", path, e);
89            return default_package_data(Some(DatasourceId::PipfileLock));
90        }
91    };
92
93    parse_pipfile_lock(&json_content)
94}
95
96fn parse_pipfile_lock(json_content: &JsonValue) -> PackageData {
97    let mut package_data = default_package_data(Some(DatasourceId::PipfileLock));
98    package_data.sha256 = extract_lockfile_sha256(json_content);
99
100    let meta = json_content
101        .get(FIELD_META)
102        .and_then(|value| value.as_object());
103    let pipfile_spec = meta.and_then(|value| value.get("pipfile-spec"));
104    let sources = meta.and_then(|value| value.get("sources"));
105    let requires = meta.and_then(|value| value.get("requires"));
106    let _ = (pipfile_spec, sources, requires);
107
108    let default_deps = extract_lockfile_dependencies(json_content, FIELD_DEFAULT, "install", true);
109    let develop_deps = extract_lockfile_dependencies(json_content, FIELD_DEVELOP, "develop", false);
110    package_data.dependencies = [default_deps, develop_deps].concat();
111
112    package_data
113}
114
115fn extract_lockfile_sha256(json_content: &JsonValue) -> Option<String> {
116    json_content
117        .get(FIELD_META)
118        .and_then(|meta| meta.get(FIELD_HASH))
119        .and_then(|hash| hash.get(FIELD_SHA256))
120        .and_then(|value| value.as_str())
121        .map(|value| value.to_string())
122}
123
124fn extract_lockfile_dependencies(
125    json_content: &JsonValue,
126    section: &str,
127    scope: &str,
128    is_runtime: bool,
129) -> Vec<Dependency> {
130    let mut dependencies = Vec::new();
131
132    if let Some(section_map) = json_content
133        .get(section)
134        .and_then(|value| value.as_object())
135    {
136        for (name, value) in section_map {
137            if let Some(dependency) = build_lockfile_dependency(name, value, scope, is_runtime) {
138                dependencies.push(dependency);
139            }
140        }
141    }
142
143    dependencies
144}
145
146fn build_lockfile_dependency(
147    name: &str,
148    value: &JsonValue,
149    scope: &str,
150    is_runtime: bool,
151) -> Option<Dependency> {
152    let normalized_name = normalize_pypi_name(name);
153    let requirement = extract_lockfile_requirement(value)?;
154    let version = strip_pipfile_lock_version(&requirement);
155    let purl = create_pypi_purl(&normalized_name, version.as_deref());
156
157    let _hashes = extract_lockfile_hashes(value);
158
159    Some(Dependency {
160        purl,
161        extracted_requirement: Some(requirement),
162        scope: Some(scope.to_string()),
163        is_runtime: Some(is_runtime),
164        is_optional: Some(false),
165        is_pinned: Some(true),
166        is_direct: Some(true),
167        resolved_package: None,
168        extra_data: None,
169    })
170}
171
172fn extract_lockfile_requirement(value: &JsonValue) -> Option<String> {
173    match value {
174        JsonValue::String(spec) => Some(spec.to_string()),
175        JsonValue::Object(map) => map
176            .get(FIELD_VERSION)
177            .and_then(|version| version.as_str())
178            .map(|version| version.to_string()),
179        _ => None,
180    }
181}
182
183fn extract_lockfile_hashes(value: &JsonValue) -> Vec<String> {
184    let mut hashes = Vec::new();
185    let hash_values = value
186        .get(FIELD_HASHES)
187        .and_then(|hashes_value| hashes_value.as_array());
188
189    if let Some(hash_values) = hash_values {
190        for hash_value in hash_values {
191            if let Some(hash) = hash_value.as_str()
192                && let Some(stripped) = hash.strip_prefix("sha256:")
193            {
194                hashes.push(stripped.to_string());
195            }
196        }
197    }
198
199    hashes
200}
201
202fn strip_pipfile_lock_version(requirement: &str) -> Option<String> {
203    let trimmed = requirement.trim();
204    if let Some(stripped) = trimmed.strip_prefix("==") {
205        let version = stripped.trim();
206        if version.is_empty() {
207            None
208        } else {
209            Some(version.to_string())
210        }
211    } else {
212        None
213    }
214}
215
216fn extract_from_pipfile(path: &Path) -> PackageData {
217    let toml_content = match read_toml_file(path) {
218        Ok(content) => content,
219        Err(e) => {
220            warn!("Failed to read Pipfile at {:?}: {}", path, e);
221            return default_package_data(Some(DatasourceId::Pipfile));
222        }
223    };
224
225    parse_pipfile(&toml_content)
226}
227
228fn parse_pipfile(toml_content: &TomlValue) -> PackageData {
229    let mut package_data = default_package_data(Some(DatasourceId::Pipfile));
230
231    let packages = toml_content
232        .get(FIELD_PACKAGES)
233        .and_then(|value| value.as_table());
234    let dev_packages = toml_content
235        .get(FIELD_DEV_PACKAGES)
236        .and_then(|value| value.as_table());
237
238    let mut dependencies = Vec::new();
239    if let Some(packages) = packages {
240        dependencies.extend(extract_pipfile_dependencies(packages, "install", true));
241    }
242    if let Some(dev_packages) = dev_packages {
243        dependencies.extend(extract_pipfile_dependencies(dev_packages, "develop", false));
244    }
245
246    package_data.dependencies = dependencies;
247    package_data.extra_data = build_pipfile_extra_data(toml_content);
248
249    package_data
250}
251
252fn extract_pipfile_dependencies(
253    packages: &TomlMap<String, TomlValue>,
254    scope: &str,
255    is_runtime: bool,
256) -> Vec<Dependency> {
257    let mut dependencies = Vec::new();
258
259    for (name, value) in packages {
260        if let Some(dependency) = build_pipfile_dependency(name, value, scope, is_runtime) {
261            dependencies.push(dependency);
262        }
263    }
264
265    dependencies
266}
267
268fn build_pipfile_dependency(
269    name: &str,
270    value: &TomlValue,
271    scope: &str,
272    is_runtime: bool,
273) -> Option<Dependency> {
274    let normalized_name = normalize_pypi_name(name);
275    let requirement = extract_pipfile_requirement(value);
276    if requirement.is_none() && is_non_registry_dependency(value) {
277        return None;
278    }
279    let requirement = requirement?;
280    let purl = create_pypi_purl(&normalized_name, None);
281
282    Some(Dependency {
283        purl,
284        extracted_requirement: Some(requirement),
285        scope: Some(scope.to_string()),
286        is_runtime: Some(is_runtime),
287        is_optional: Some(false),
288        is_pinned: Some(false),
289        is_direct: Some(true),
290        resolved_package: None,
291        extra_data: None,
292    })
293}
294
295fn extract_pipfile_requirement(value: &TomlValue) -> Option<String> {
296    match value {
297        TomlValue::String(spec) => Some(spec.to_string()),
298        TomlValue::Boolean(true) => Some("*".to_string()),
299        TomlValue::Table(table) => table
300            .get(FIELD_VERSION)
301            .and_then(|version| version.as_str())
302            .map(|version| version.to_string()),
303        _ => None,
304    }
305}
306
307fn is_non_registry_dependency(value: &TomlValue) -> bool {
308    let table = match value {
309        TomlValue::Table(table) => table,
310        _ => return false,
311    };
312
313    ["git", "path", "file", "url", "hg", "svn"]
314        .iter()
315        .any(|key| table.contains_key(*key))
316}
317
318fn build_pipfile_extra_data(
319    toml_content: &TomlValue,
320) -> Option<HashMap<String, serde_json::Value>> {
321    let mut extra_data = HashMap::new();
322
323    if let Some(requires_table) = toml_content
324        .get(FIELD_REQUIRES)
325        .and_then(|value| value.as_table())
326        && let Some(python_version) = requires_table
327            .get(FIELD_PYTHON_VERSION)
328            .and_then(|value| value.as_str())
329    {
330        extra_data.insert(
331            FIELD_PYTHON_VERSION.to_string(),
332            serde_json::Value::String(python_version.to_string()),
333        );
334    }
335
336    if let Some(source_value) = toml_content.get(FIELD_SOURCE)
337        && let Some(sources) = parse_pipfile_sources(source_value)
338    {
339        extra_data.insert("sources".to_string(), sources);
340    }
341
342    if extra_data.is_empty() {
343        None
344    } else {
345        Some(extra_data)
346    }
347}
348
349fn parse_pipfile_sources(source_value: &TomlValue) -> Option<serde_json::Value> {
350    match source_value {
351        TomlValue::Array(sources) => {
352            let mut json_sources = Vec::new();
353            for source in sources {
354                if let Some(table) = source.as_table() {
355                    let mut json_map = serde_json::Map::new();
356                    if let Some(name) = table.get("name").and_then(|value| value.as_str()) {
357                        json_map.insert(
358                            "name".to_string(),
359                            serde_json::Value::String(name.to_string()),
360                        );
361                    }
362                    if let Some(url) = table.get("url").and_then(|value| value.as_str()) {
363                        json_map.insert(
364                            "url".to_string(),
365                            serde_json::Value::String(url.to_string()),
366                        );
367                    }
368                    if let Some(verify_ssl) =
369                        table.get("verify_ssl").and_then(|value| value.as_bool())
370                    {
371                        json_map.insert(
372                            "verify_ssl".to_string(),
373                            serde_json::Value::Bool(verify_ssl),
374                        );
375                    }
376                    json_sources.push(serde_json::Value::Object(json_map));
377                }
378            }
379
380            Some(serde_json::Value::Array(json_sources))
381        }
382        TomlValue::Table(table) => {
383            let mut json_map = serde_json::Map::new();
384            for (key, value) in table {
385                match value {
386                    TomlValue::String(value) => {
387                        json_map.insert(
388                            key.to_string(),
389                            serde_json::Value::String(value.to_string()),
390                        );
391                    }
392                    TomlValue::Boolean(value) => {
393                        json_map.insert(key.to_string(), serde_json::Value::Bool(*value));
394                    }
395                    _ => {}
396                }
397            }
398            Some(serde_json::Value::Object(json_map))
399        }
400        _ => None,
401    }
402}
403
404fn normalize_pypi_name(name: &str) -> String {
405    name.trim().to_ascii_lowercase()
406}
407
408fn create_pypi_purl(name: &str, version: Option<&str>) -> Option<String> {
409    let mut purl = PackageUrl::new(PipfileLockParser::PACKAGE_TYPE.as_str(), name).ok()?;
410    if let Some(version) = version
411        && purl.with_version(version).is_err()
412    {
413        return None;
414    }
415
416    Some(purl.to_string())
417}
418
419fn default_package_data(datasource_id: Option<DatasourceId>) -> PackageData {
420    PackageData {
421        package_type: Some(PipfileLockParser::PACKAGE_TYPE),
422        primary_language: Some("Python".to_string()),
423        datasource_id,
424        ..Default::default()
425    }
426}
427
428crate::register_parser!(
429    "Pipenv lockfile and manifest",
430    &["**/Pipfile.lock", "**/Pipfile"],
431    "pypi",
432    "Python",
433    Some("https://github.com/pypa/pipfile"),
434);