Skip to main content

provenant/parsers/
uv_lock.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4use std::collections::{HashMap, HashSet, VecDeque};
5use std::path::Path;
6
7use crate::parser_warn as warn;
8use packageurl::PackageUrl;
9use serde_json::Value as JsonValue;
10use toml::Value as TomlValue;
11use toml::map::Map as TomlMap;
12
13use crate::models::{
14    DatasourceId, Dependency, PackageData, PackageType, ResolvedPackage, Sha256Digest,
15};
16use crate::parsers::python::read_toml_file;
17use crate::parsers::utils::{MAX_ITERATION_COUNT, RecursionGuard, truncate_field};
18
19use super::PackageParser;
20
21const FIELD_PACKAGE: &str = "package";
22const FIELD_NAME: &str = "name";
23const FIELD_VERSION: &str = "version";
24const FIELD_SOURCE: &str = "source";
25const FIELD_DEPENDENCIES: &str = "dependencies";
26const FIELD_OPTIONAL_DEPENDENCIES: &str = "optional-dependencies";
27const FIELD_DEV_DEPENDENCIES: &str = "dev-dependencies";
28const FIELD_METADATA: &str = "metadata";
29const FIELD_REQUIRES_DIST: &str = "requires-dist";
30const FIELD_REQUIRES_DEV: &str = "requires-dev";
31const FIELD_METADATA_OPTIONAL_DEPENDENCIES: &str = "optional-dependencies";
32const FIELD_MARKER: &str = "marker";
33const FIELD_EXTRA: &str = "extra";
34const FIELD_SPECIFIER: &str = "specifier";
35const FIELD_REVISION: &str = "revision";
36const FIELD_REQUIRES_PYTHON: &str = "requires-python";
37const FIELD_RESOLUTION_MARKERS: &str = "resolution-markers";
38const FIELD_MANIFEST: &str = "manifest";
39
40pub struct UvLockParser;
41
42#[derive(Clone, Debug, Default)]
43struct DirectDependencyInfo {
44    extracted_requirement: Option<String>,
45    scope: Option<String>,
46    is_runtime: bool,
47    is_optional: bool,
48    extra_data: Option<HashMap<String, JsonValue>>,
49    source_key: Option<String>,
50}
51
52#[derive(Clone, Debug)]
53struct DependencyEdge {
54    name: String,
55    extracted_requirement: Option<String>,
56    scope: Option<String>,
57    is_runtime: bool,
58    is_optional: bool,
59    source_key: Option<String>,
60    extra_data: Option<HashMap<String, JsonValue>>,
61}
62
63impl PackageParser for UvLockParser {
64    const PACKAGE_TYPE: PackageType = PackageType::Pypi;
65
66    fn is_match(path: &Path) -> bool {
67        path.file_name()
68            .and_then(|name| name.to_str())
69            .is_some_and(|name| name == "uv.lock")
70    }
71
72    fn extract_packages(path: &Path) -> Vec<PackageData> {
73        let toml_content = match read_toml_file(path) {
74            Ok(content) => content,
75            Err(e) => {
76                warn!("Failed to read uv.lock at {:?}: {}", path, e);
77                return vec![default_package_data()];
78            }
79        };
80
81        vec![parse_uv_lock(&toml_content)]
82    }
83}
84
85fn parse_uv_lock(toml_content: &TomlValue) -> PackageData {
86    let packages = toml_content
87        .get(FIELD_PACKAGE)
88        .and_then(TomlValue::as_array)
89        .cloned()
90        .unwrap_or_default();
91
92    if packages.is_empty() {
93        return default_package_data();
94    }
95
96    let package_tables: Vec<&TomlMap<String, TomlValue>> = packages
97        .iter()
98        .take(MAX_ITERATION_COUNT)
99        .filter_map(TomlValue::as_table)
100        .collect();
101
102    if package_tables.is_empty() {
103        return default_package_data();
104    }
105
106    let root_index = find_root_package_index(&package_tables);
107    let package_lookup = build_package_lookup(&package_tables);
108
109    let direct_infos = root_index
110        .and_then(|index| package_tables.get(index).copied())
111        .map(collect_root_direct_dependencies)
112        .unwrap_or_default();
113
114    let runtime_roots: Vec<(String, Option<String>)> = direct_infos
115        .iter()
116        .filter(|(_, info)| info.is_runtime)
117        .map(|(name, info)| (name.clone(), info.source_key.clone()))
118        .collect();
119    let dev_roots: Vec<(String, Option<String>)> = direct_infos
120        .iter()
121        .filter(|(_, info)| !info.is_runtime && !info.is_optional)
122        .map(|(name, info)| (name.clone(), info.source_key.clone()))
123        .collect();
124    let optional_roots: Vec<(String, Option<String>)> = direct_infos
125        .iter()
126        .filter(|(_, info)| info.is_optional)
127        .map(|(name, info)| (name.clone(), info.source_key.clone()))
128        .collect();
129
130    let runtime_reachable =
131        collect_reachable_packages(&package_tables, &package_lookup, &runtime_roots, false);
132    let dev_reachable =
133        collect_reachable_packages(&package_tables, &package_lookup, &dev_roots, true);
134    let optional_reachable =
135        collect_reachable_packages(&package_tables, &package_lookup, &optional_roots, true);
136
137    let mut package_data = default_package_data();
138    package_data.extra_data = build_lock_extra_data(toml_content);
139
140    if let Some(index) = root_index
141        && let Some(root_table) = package_tables.get(index)
142    {
143        package_data.name = root_table
144            .get(FIELD_NAME)
145            .and_then(TomlValue::as_str)
146            .map(normalize_pypi_name);
147        package_data.version = root_table
148            .get(FIELD_VERSION)
149            .and_then(TomlValue::as_str)
150            .map(|value| truncate_field(value.to_string()));
151        package_data.is_virtual =
152            package_source_table(root_table).is_some_and(|source| source.contains_key("virtual"));
153        package_data.purl = package_data
154            .name
155            .as_deref()
156            .and_then(|name| create_pypi_purl(name, package_data.version.as_deref()));
157    }
158
159    package_data.dependencies = package_tables
160        .iter()
161        .enumerate()
162        .filter(|(index, _)| Some(*index) != root_index)
163        .filter_map(|(_, package_table)| {
164            build_top_level_dependency(
165                package_table,
166                root_index.is_none(),
167                &direct_infos,
168                &runtime_reachable,
169                &dev_reachable,
170                &optional_reachable,
171                &package_lookup,
172            )
173        })
174        .collect();
175
176    package_data
177}
178
179fn build_top_level_dependency(
180    package_table: &TomlMap<String, TomlValue>,
181    no_root_package: bool,
182    direct_infos: &HashMap<String, DirectDependencyInfo>,
183    runtime_reachable: &HashSet<String>,
184    dev_reachable: &HashSet<String>,
185    optional_reachable: &HashSet<String>,
186    package_lookup: &HashMap<String, Vec<usize>>,
187) -> Option<Dependency> {
188    let name = package_table
189        .get(FIELD_NAME)
190        .and_then(TomlValue::as_str)
191        .map(normalize_pypi_name)?;
192    let version = package_table
193        .get(FIELD_VERSION)
194        .and_then(TomlValue::as_str)
195        .map(|value| truncate_field(value.to_string()))?;
196
197    let direct_info = direct_infos.get(&name);
198    let is_direct = direct_info.is_some();
199    let is_runtime = if no_root_package {
200        true
201    } else if let Some(info) = direct_info {
202        info.is_runtime
203    } else if runtime_reachable.contains(&name) {
204        true
205    } else {
206        !dev_reachable.contains(&name) && !optional_reachable.contains(&name)
207    };
208    let is_optional = direct_info.is_some_and(|info| info.is_optional)
209        || (!is_direct && optional_reachable.contains(&name) && !runtime_reachable.contains(&name));
210
211    Some(Dependency {
212        purl: create_pypi_purl(&name, Some(&version)).map(truncate_field),
213        extracted_requirement: direct_info
214            .and_then(|info| info.extracted_requirement.clone())
215            .map(truncate_field),
216        scope: direct_info
217            .and_then(|info| info.scope.clone())
218            .map(truncate_field),
219        is_runtime: Some(is_runtime),
220        is_optional: Some(is_optional),
221        is_pinned: Some(true),
222        is_direct: Some(is_direct),
223        resolved_package: Some(Box::new(build_resolved_package(
224            package_table,
225            package_lookup,
226        ))),
227        extra_data: direct_info.and_then(|info| info.extra_data.clone()),
228    })
229}
230
231fn build_resolved_package(
232    package_table: &TomlMap<String, TomlValue>,
233    package_lookup: &HashMap<String, Vec<usize>>,
234) -> ResolvedPackage {
235    let name = package_table
236        .get(FIELD_NAME)
237        .and_then(TomlValue::as_str)
238        .map(normalize_pypi_name)
239        .unwrap_or_default();
240    let version = package_table
241        .get(FIELD_VERSION)
242        .and_then(TomlValue::as_str)
243        .map(|value| truncate_field(value.to_string()))
244        .unwrap_or_default();
245
246    let (_, repository_download_url, api_data_url, purl) =
247        build_pypi_urls(Some(&name), Some(&version));
248    let repository_homepage_url =
249        Some(truncate_field(format!("https://pypi.org/project/{}", name)));
250    let (download_url, sha256) = extract_artifact_metadata(package_table);
251
252    let download_url = download_url.map(truncate_field);
253
254    ResolvedPackage {
255        primary_language: Some("Python".to_string()),
256        download_url,
257        sha1: None,
258        sha256: sha256.and_then(|h| Sha256Digest::from_hex(&h).ok()),
259        sha512: None,
260        md5: None,
261        is_virtual: true,
262        extra_data: build_package_extra_data(package_table),
263        dependencies: collect_package_dependency_edges(package_table)
264            .into_iter()
265            .map(|edge| edge_to_dependency(edge, package_lookup))
266            .collect(),
267        repository_homepage_url,
268        repository_download_url: repository_download_url.map(truncate_field),
269        api_data_url: api_data_url.map(truncate_field),
270        datasource_id: Some(DatasourceId::PypiUvLock),
271        purl: purl.map(truncate_field),
272        ..ResolvedPackage::new(UvLockParser::PACKAGE_TYPE, String::new(), name, version)
273    }
274}
275
276fn edge_to_dependency(
277    edge: DependencyEdge,
278    package_lookup: &HashMap<String, Vec<usize>>,
279) -> Dependency {
280    let is_pinned = edge
281        .source_key
282        .as_ref()
283        .map(|_| !package_lookup.contains_key(&edge.name))
284        .unwrap_or(false);
285
286    Dependency {
287        purl: create_pypi_purl(&edge.name, None).map(truncate_field),
288        extracted_requirement: edge.extracted_requirement.map(truncate_field),
289        scope: edge.scope.map(truncate_field),
290        is_runtime: Some(edge.is_runtime),
291        is_optional: Some(edge.is_optional),
292        is_pinned: Some(is_pinned),
293        is_direct: Some(true),
294        resolved_package: None,
295        extra_data: edge.extra_data,
296    }
297}
298
299fn collect_root_direct_dependencies(
300    root_table: &TomlMap<String, TomlValue>,
301) -> HashMap<String, DirectDependencyInfo> {
302    let mut infos = HashMap::new();
303    let metadata = root_table.get(FIELD_METADATA).and_then(TomlValue::as_table);
304    let runtime_requirements = metadata
305        .and_then(|metadata| metadata.get(FIELD_REQUIRES_DIST))
306        .map(parse_requirement_metadata_array)
307        .unwrap_or_default();
308    let dev_requirements = metadata
309        .and_then(|metadata| metadata.get(FIELD_REQUIRES_DEV))
310        .and_then(TomlValue::as_table)
311        .map(parse_requirement_metadata_table)
312        .unwrap_or_default();
313    let optional_requirements = metadata
314        .and_then(|metadata| metadata.get(FIELD_METADATA_OPTIONAL_DEPENDENCIES))
315        .and_then(TomlValue::as_table)
316        .map(parse_requirement_metadata_table)
317        .unwrap_or_default();
318
319    for edge in collect_dependency_edges_from_array(
320        root_table
321            .get(FIELD_DEPENDENCIES)
322            .and_then(TomlValue::as_array),
323        None,
324        true,
325        false,
326        runtime_requirements.get("__runtime__"),
327    ) {
328        merge_direct_dependency_info(&mut infos, edge);
329    }
330
331    if let Some(optional_table) = root_table
332        .get(FIELD_OPTIONAL_DEPENDENCIES)
333        .and_then(TomlValue::as_table)
334    {
335        for (group, value) in optional_table.iter().take(MAX_ITERATION_COUNT) {
336            let requirement_map = optional_requirements.get(group);
337            for edge in collect_dependency_edges_from_array(
338                value.as_array(),
339                Some(group.to_string()),
340                false,
341                true,
342                requirement_map,
343            )
344            .into_iter()
345            .take(MAX_ITERATION_COUNT)
346            {
347                merge_direct_dependency_info(&mut infos, edge);
348            }
349        }
350    }
351
352    if let Some(dev_table) = root_table
353        .get(FIELD_DEV_DEPENDENCIES)
354        .and_then(TomlValue::as_table)
355    {
356        for (group, value) in dev_table.iter().take(MAX_ITERATION_COUNT) {
357            let requirement_map = dev_requirements.get(group);
358            for edge in collect_dependency_edges_from_array(
359                value.as_array(),
360                Some(group.to_string()),
361                false,
362                false,
363                requirement_map,
364            )
365            .into_iter()
366            .take(MAX_ITERATION_COUNT)
367            {
368                merge_direct_dependency_info(&mut infos, edge);
369            }
370        }
371    }
372
373    infos
374}
375
376fn merge_direct_dependency_info(
377    infos: &mut HashMap<String, DirectDependencyInfo>,
378    edge: DependencyEdge,
379) {
380    let name = edge.name.clone();
381    let new_info = direct_info_from_edge(edge);
382
383    if let Some(existing) = infos.get_mut(&name) {
384        existing.is_runtime |= new_info.is_runtime;
385        existing.is_optional &= new_info.is_optional;
386
387        if existing.extracted_requirement.is_none() {
388            existing.extracted_requirement = new_info.extracted_requirement.clone();
389        }
390
391        existing.scope = merge_scope(existing.scope.as_ref(), new_info.scope.as_ref());
392        existing.extra_data =
393            merge_optional_json_maps(existing.extra_data.take(), new_info.extra_data);
394
395        if existing.source_key != new_info.source_key {
396            existing.source_key = None;
397        }
398    } else {
399        infos.insert(name, new_info);
400    }
401}
402
403fn merge_scope(current: Option<&String>, new: Option<&String>) -> Option<String> {
404    match (current, new) {
405        (None, None) => None,
406        (None, Some(_)) | (Some(_), None) => None,
407        (Some(left), Some(right)) if left == right => Some(left.clone()),
408        _ => None,
409    }
410}
411
412fn merge_optional_json_maps(
413    current: Option<HashMap<String, JsonValue>>,
414    new: Option<HashMap<String, JsonValue>>,
415) -> Option<HashMap<String, JsonValue>> {
416    match (current, new) {
417        (None, None) => None,
418        (Some(map), None) | (None, Some(map)) => Some(map),
419        (Some(mut current), Some(new)) => {
420            for (key, value) in new {
421                current.entry(key).or_insert(value);
422            }
423            Some(current)
424        }
425    }
426}
427
428fn direct_info_from_edge(edge: DependencyEdge) -> DirectDependencyInfo {
429    DirectDependencyInfo {
430        extracted_requirement: edge.extracted_requirement,
431        scope: edge.scope,
432        is_runtime: edge.is_runtime,
433        is_optional: edge.is_optional,
434        extra_data: edge.extra_data,
435        source_key: edge.source_key,
436    }
437}
438
439fn collect_package_dependency_edges(
440    package_table: &TomlMap<String, TomlValue>,
441) -> Vec<DependencyEdge> {
442    let mut edges = Vec::new();
443
444    edges.extend(collect_dependency_edges_from_array(
445        package_table
446            .get(FIELD_DEPENDENCIES)
447            .and_then(TomlValue::as_array),
448        None,
449        true,
450        false,
451        None,
452    ));
453
454    if let Some(optional_table) = package_table
455        .get(FIELD_OPTIONAL_DEPENDENCIES)
456        .and_then(TomlValue::as_table)
457    {
458        for (group, value) in optional_table.iter().take(MAX_ITERATION_COUNT) {
459            edges.extend(
460                collect_dependency_edges_from_array(
461                    value.as_array(),
462                    Some(group.to_string()),
463                    false,
464                    true,
465                    None,
466                )
467                .into_iter()
468                .take(MAX_ITERATION_COUNT),
469            );
470        }
471    }
472
473    if let Some(dev_table) = package_table
474        .get(FIELD_DEV_DEPENDENCIES)
475        .and_then(TomlValue::as_table)
476    {
477        for (group, value) in dev_table.iter().take(MAX_ITERATION_COUNT) {
478            edges.extend(
479                collect_dependency_edges_from_array(
480                    value.as_array(),
481                    Some(group.to_string()),
482                    false,
483                    false,
484                    None,
485                )
486                .into_iter()
487                .take(MAX_ITERATION_COUNT),
488            );
489        }
490    }
491
492    edges
493}
494
495fn collect_dependency_edges_from_array(
496    values: Option<&Vec<TomlValue>>,
497    scope: Option<String>,
498    is_runtime: bool,
499    is_optional: bool,
500    requirement_map: Option<&HashMap<String, String>>,
501) -> Vec<DependencyEdge> {
502    values
503        .into_iter()
504        .flatten()
505        .filter_map(|value| {
506            build_dependency_edge(
507                value,
508                scope.clone(),
509                is_runtime,
510                is_optional,
511                requirement_map,
512            )
513        })
514        .collect()
515}
516
517fn build_dependency_edge(
518    value: &TomlValue,
519    scope: Option<String>,
520    is_runtime: bool,
521    is_optional: bool,
522    requirement_map: Option<&HashMap<String, String>>,
523) -> Option<DependencyEdge> {
524    let table = value.as_table()?;
525    let name = table
526        .get(FIELD_NAME)
527        .and_then(TomlValue::as_str)
528        .map(normalize_pypi_name)?;
529
530    let mut extra_data = HashMap::new();
531    if let Some(marker) = table.get(FIELD_MARKER).and_then(TomlValue::as_str) {
532        extra_data.insert(
533            FIELD_MARKER.to_string(),
534            JsonValue::String(marker.to_string()),
535        );
536    }
537    if let Some(extra_value) = table.get(FIELD_EXTRA) {
538        let json_value = toml_value_to_json(extra_value);
539        extra_data.insert(FIELD_EXTRA.to_string(), json_value);
540    }
541
542    let source_key = table
543        .get(FIELD_SOURCE)
544        .and_then(TomlValue::as_table)
545        .and_then(source_table_key);
546    if let Some(source) = table.get(FIELD_SOURCE) {
547        extra_data.insert(FIELD_SOURCE.to_string(), toml_value_to_json(source));
548    }
549
550    let extracted_requirement = requirement_map
551        .and_then(|map| map.get(&name).cloned().map(truncate_field))
552        .or_else(|| {
553            table
554                .get(FIELD_SPECIFIER)
555                .and_then(TomlValue::as_str)
556                .map(|value| truncate_field(value.to_string()))
557        });
558
559    Some(DependencyEdge {
560        name,
561        extracted_requirement,
562        scope,
563        is_runtime,
564        is_optional,
565        source_key,
566        extra_data: (!extra_data.is_empty()).then_some(extra_data),
567    })
568}
569
570fn parse_requirement_metadata_array(value: &TomlValue) -> HashMap<String, HashMap<String, String>> {
571    let mut grouped = HashMap::new();
572    let runtime = value
573        .as_array()
574        .map(|values| parse_requirement_entries(values))
575        .unwrap_or_default();
576    grouped.insert("__runtime__".to_string(), runtime);
577    grouped
578}
579
580fn parse_requirement_metadata_table(
581    table: &TomlMap<String, TomlValue>,
582) -> HashMap<String, HashMap<String, String>> {
583    table
584        .iter()
585        .map(|(group, value)| {
586            (
587                group.to_string(),
588                value
589                    .as_array()
590                    .map(|values| parse_requirement_entries(values))
591                    .unwrap_or_default(),
592            )
593        })
594        .collect()
595}
596
597fn parse_requirement_entries(values: &[TomlValue]) -> HashMap<String, String> {
598    values
599        .iter()
600        .take(MAX_ITERATION_COUNT)
601        .filter_map(|value| {
602            let table = value.as_table()?;
603            let name = table
604                .get(FIELD_NAME)
605                .and_then(TomlValue::as_str)
606                .map(normalize_pypi_name)?;
607            let specifier = table
608                .get(FIELD_SPECIFIER)
609                .and_then(TomlValue::as_str)
610                .map(|value| truncate_field(value.to_string()))?;
611            Some((name, specifier))
612        })
613        .collect()
614}
615
616fn collect_reachable_packages(
617    package_tables: &[&TomlMap<String, TomlValue>],
618    package_lookup: &HashMap<String, Vec<usize>>,
619    roots: &[(String, Option<String>)],
620    include_non_runtime_edges: bool,
621) -> HashSet<String> {
622    let mut visited = HashSet::new();
623    let mut queue: VecDeque<(String, Option<String>)> = roots.iter().cloned().collect();
624    let mut iterations: usize = 0;
625
626    while let Some((name, source_key)) = queue.pop_front() {
627        iterations += 1;
628        if iterations > MAX_ITERATION_COUNT {
629            warn!(
630                "collect_reachable_packages exceeded MAX_ITERATION_COUNT ({})",
631                MAX_ITERATION_COUNT
632            );
633            break;
634        }
635        let Some(index) =
636            match_package_index(package_tables, package_lookup, &name, source_key.as_deref())
637        else {
638            continue;
639        };
640
641        let Some(package_table) = package_tables.get(index) else {
642            continue;
643        };
644
645        let package_name = package_table
646            .get(FIELD_NAME)
647            .and_then(TomlValue::as_str)
648            .map(normalize_pypi_name)
649            .unwrap_or(name);
650
651        if !visited.insert(package_name.clone()) {
652            continue;
653        }
654
655        let edges = if include_non_runtime_edges {
656            collect_package_dependency_edges(package_table)
657        } else {
658            collect_dependency_edges_from_array(
659                package_table
660                    .get(FIELD_DEPENDENCIES)
661                    .and_then(TomlValue::as_array),
662                None,
663                true,
664                false,
665                None,
666            )
667        };
668
669        for edge in edges {
670            queue.push_back((edge.name, edge.source_key));
671        }
672    }
673
674    visited
675}
676
677fn build_package_lookup(
678    package_tables: &[&TomlMap<String, TomlValue>],
679) -> HashMap<String, Vec<usize>> {
680    let mut lookup: HashMap<String, Vec<usize>> = HashMap::new();
681    for (index, package_table) in package_tables.iter().enumerate() {
682        if let Some(name) = package_table
683            .get(FIELD_NAME)
684            .and_then(TomlValue::as_str)
685            .map(normalize_pypi_name)
686        {
687            lookup.entry(name).or_default().push(index);
688        }
689    }
690    lookup
691}
692
693fn match_package_index(
694    package_tables: &[&TomlMap<String, TomlValue>],
695    package_lookup: &HashMap<String, Vec<usize>>,
696    name: &str,
697    source_key: Option<&str>,
698) -> Option<usize> {
699    let candidates = package_lookup.get(name)?;
700    if candidates.len() == 1 {
701        return candidates.first().copied();
702    }
703
704    let source_key = source_key?;
705    candidates.iter().copied().find(|index| {
706        package_tables
707            .get(*index)
708            .and_then(|table| package_source_table(table))
709            .and_then(source_table_key)
710            .as_deref()
711            == Some(source_key)
712    })
713}
714
715fn find_root_package_index(package_tables: &[&TomlMap<String, TomlValue>]) -> Option<usize> {
716    if let Some(index) = package_tables.iter().position(|table| {
717        package_source_table(table)
718            .and_then(local_source_path)
719            .is_some_and(|path| path == ".")
720    }) {
721        return Some(index);
722    }
723
724    package_tables.iter().position(|table| {
725        package_source_table(table)
726            .is_some_and(|source| source.contains_key("editable") || source.contains_key("virtual"))
727    })
728}
729
730fn local_source_path(source_table: &TomlMap<String, TomlValue>) -> Option<&str> {
731    source_table
732        .get("virtual")
733        .and_then(TomlValue::as_str)
734        .or_else(|| source_table.get("editable").and_then(TomlValue::as_str))
735}
736
737fn build_lock_extra_data(toml_content: &TomlValue) -> Option<HashMap<String, JsonValue>> {
738    let mut extra_data = HashMap::new();
739
740    if let Some(version) = toml_content
741        .get(FIELD_VERSION)
742        .and_then(TomlValue::as_integer)
743    {
744        extra_data.insert(
745            "lockfile_version".to_string(),
746            JsonValue::String(version.to_string()),
747        );
748    }
749
750    if let Some(revision) = toml_content
751        .get(FIELD_REVISION)
752        .and_then(TomlValue::as_integer)
753    {
754        extra_data.insert(
755            FIELD_REVISION.to_string(),
756            JsonValue::String(revision.to_string()),
757        );
758    }
759
760    if let Some(requires_python) = toml_content
761        .get(FIELD_REQUIRES_PYTHON)
762        .and_then(TomlValue::as_str)
763    {
764        extra_data.insert(
765            "requires_python".to_string(),
766            JsonValue::String(requires_python.to_string()),
767        );
768    }
769
770    if let Some(markers) = toml_content.get(FIELD_RESOLUTION_MARKERS) {
771        extra_data.insert(
772            FIELD_RESOLUTION_MARKERS.to_string(),
773            toml_value_to_json(markers),
774        );
775    }
776
777    if let Some(manifest) = toml_content.get(FIELD_MANIFEST) {
778        extra_data.insert(FIELD_MANIFEST.to_string(), toml_value_to_json(manifest));
779    }
780
781    (!extra_data.is_empty()).then_some(extra_data)
782}
783
784fn build_package_extra_data(
785    package_table: &TomlMap<String, TomlValue>,
786) -> Option<HashMap<String, JsonValue>> {
787    let mut extra_data = HashMap::new();
788
789    if let Some(source) = package_table.get(FIELD_SOURCE) {
790        extra_data.insert(FIELD_SOURCE.to_string(), toml_value_to_json(source));
791    }
792
793    if let Some(metadata) = package_table.get(FIELD_METADATA) {
794        extra_data.insert(FIELD_METADATA.to_string(), toml_value_to_json(metadata));
795    }
796
797    (!extra_data.is_empty()).then_some(extra_data)
798}
799
800fn extract_artifact_metadata(
801    package_table: &TomlMap<String, TomlValue>,
802) -> (Option<String>, Option<String>) {
803    if let Some(sdist_table) = package_table.get("sdist").and_then(TomlValue::as_table) {
804        let download_url = sdist_table
805            .get("url")
806            .and_then(TomlValue::as_str)
807            .map(|value| truncate_field(value.to_string()));
808        let sha256 = sdist_table
809            .get("hash")
810            .and_then(TomlValue::as_str)
811            .and_then(strip_sha256_prefix);
812        if download_url.is_some() || sha256.is_some() {
813            return (download_url, sha256);
814        }
815    }
816
817    let wheel_table = package_table
818        .get("wheels")
819        .and_then(TomlValue::as_array)
820        .and_then(|wheels| wheels.first())
821        .and_then(TomlValue::as_table);
822
823    let download_url = wheel_table
824        .and_then(|table| table.get("url"))
825        .and_then(TomlValue::as_str)
826        .map(|value| truncate_field(value.to_string()));
827    let sha256 = wheel_table
828        .and_then(|table| table.get("hash"))
829        .and_then(TomlValue::as_str)
830        .and_then(strip_sha256_prefix);
831
832    (download_url, sha256)
833}
834
835fn strip_sha256_prefix(value: &str) -> Option<String> {
836    value.strip_prefix("sha256:").map(|hash| hash.to_string())
837}
838
839fn package_source_table(
840    package_table: &TomlMap<String, TomlValue>,
841) -> Option<&TomlMap<String, TomlValue>> {
842    package_table
843        .get(FIELD_SOURCE)
844        .and_then(TomlValue::as_table)
845}
846
847fn source_table_key(source_table: &TomlMap<String, TomlValue>) -> Option<String> {
848    ["registry", "editable", "virtual", "git"]
849        .into_iter()
850        .find_map(|key| {
851            source_table
852                .get(key)
853                .and_then(TomlValue::as_str)
854                .map(|value| format!("{}:{}", key, value))
855        })
856}
857
858fn build_pypi_urls(
859    name: Option<&str>,
860    version: Option<&str>,
861) -> (
862    Option<String>,
863    Option<String>,
864    Option<String>,
865    Option<String>,
866) {
867    let repository_homepage_url =
868        name.map(|value| truncate_field(format!("https://pypi.org/project/{}", value)));
869
870    let repository_download_url = name.and_then(|value| {
871        version.map(|ver| {
872            truncate_field(format!(
873                "https://pypi.org/packages/source/{}/{}/{}-{}.tar.gz",
874                &value[..1.min(value.len())],
875                value,
876                value,
877                ver
878            ))
879        })
880    });
881
882    let api_data_url = name.map(|value| {
883        if let Some(ver) = version {
884            truncate_field(format!("https://pypi.org/pypi/{}/{}/json", value, ver))
885        } else {
886            truncate_field(format!("https://pypi.org/pypi/{}/json", value))
887        }
888    });
889
890    let purl = name.and_then(|value| create_pypi_purl(value, version));
891
892    (
893        repository_homepage_url,
894        repository_download_url,
895        api_data_url,
896        purl,
897    )
898}
899
900fn normalize_pypi_name(name: &str) -> String {
901    truncate_field(name.trim().to_ascii_lowercase())
902}
903
904fn create_pypi_purl(name: &str, version: Option<&str>) -> Option<String> {
905    if name.contains('[') || name.contains(']') {
906        return Some(truncate_field(build_manual_pypi_purl(name, version)));
907    }
908
909    if let Ok(mut purl) = PackageUrl::new(UvLockParser::PACKAGE_TYPE.as_str(), name) {
910        if let Some(version) = version
911            && purl.with_version(version).is_err()
912        {
913            return None;
914        }
915        return Some(truncate_field(purl.to_string()));
916    }
917
918    Some(truncate_field(build_manual_pypi_purl(name, version)))
919}
920
921fn build_manual_pypi_purl(name: &str, version: Option<&str>) -> String {
922    let encoded_name = name.replace('[', "%5b").replace(']', "%5d");
923    let mut purl = format!("pkg:pypi/{}", encoded_name);
924    if let Some(version) = version
925        && !version.is_empty()
926    {
927        purl.push('@');
928        purl.push_str(version);
929    }
930    purl
931}
932
933fn toml_value_to_json(value: &TomlValue) -> JsonValue {
934    toml_value_to_json_recursive(value, &mut RecursionGuard::depth_only())
935}
936
937fn toml_value_to_json_recursive(value: &TomlValue, guard: &mut RecursionGuard<()>) -> JsonValue {
938    if guard.descend() {
939        warn!("toml_value_to_json exceeded recursion depth limit");
940        return JsonValue::Null;
941    }
942
943    let result = match value {
944        TomlValue::String(value) => JsonValue::String(value.clone()),
945        TomlValue::Integer(value) => JsonValue::String(value.to_string()),
946        TomlValue::Float(value) => JsonValue::String(value.to_string()),
947        TomlValue::Boolean(value) => JsonValue::Bool(*value),
948        TomlValue::Datetime(value) => JsonValue::String(value.to_string()),
949        TomlValue::Array(values) => JsonValue::Array(
950            values
951                .iter()
952                .map(|v| toml_value_to_json_recursive(v, guard))
953                .collect(),
954        ),
955        TomlValue::Table(values) => JsonValue::Object(
956            values
957                .iter()
958                .map(|(key, value)| (key.clone(), toml_value_to_json_recursive(value, guard)))
959                .collect(),
960        ),
961    };
962    guard.ascend();
963    result
964}
965
966fn default_package_data() -> PackageData {
967    PackageData {
968        package_type: Some(UvLockParser::PACKAGE_TYPE),
969        primary_language: Some("Python".to_string()),
970        datasource_id: Some(DatasourceId::PypiUvLock),
971        ..Default::default()
972    }
973}
974
975crate::register_parser!(
976    "uv lockfile",
977    &["**/uv.lock"],
978    "pypi",
979    "Python",
980    Some("https://docs.astral.sh/uv/concepts/projects/layout/"),
981);