Skip to main content

uv_types/
hash.rs

1use std::fmt::Display;
2use std::str::FromStr;
3use std::sync::Arc;
4
5use rustc_hash::FxHashMap;
6
7use uv_configuration::HashCheckingMode;
8use uv_distribution_types::{
9    DistributionMetadata, HashGeneration, HashPolicy, Name, Requirement, RequirementSource,
10    Resolution, UnresolvedRequirement, VersionId,
11};
12use uv_normalize::PackageName;
13use uv_pep440::Version;
14use uv_pypi_types::{HashDigest, HashDigests, HashError, ResolverMarkerEnvironment};
15use uv_redacted::DisplaySafeUrl;
16
17#[derive(Debug, Default, Clone)]
18pub enum HashStrategy {
19    /// No hash policy is specified.
20    #[default]
21    None,
22    /// Hashes should be generated (specifically, a SHA-256 hash), but not validated.
23    Generate(HashGeneration),
24    /// Hashes should be validated, if present, but ignored if absent.
25    ///
26    /// If necessary, hashes should be generated to ensure that the archive is valid.
27    Verify(Arc<FxHashMap<VersionId, Vec<HashDigest>>>),
28    /// Hashes should be validated against a pre-defined list of hashes.
29    ///
30    /// If necessary, hashes should be generated to ensure that the archive is valid.
31    Require(Arc<FxHashMap<VersionId, Vec<HashDigest>>>),
32}
33
34impl HashStrategy {
35    /// Return the [`HashPolicy`] for the given distribution.
36    pub fn get<T: DistributionMetadata>(&self, distribution: &T) -> HashPolicy<'_> {
37        match self {
38            Self::None => HashPolicy::None,
39            Self::Generate(mode) => HashPolicy::Generate(*mode),
40            Self::Verify(hashes) => {
41                let id = distribution.version_id();
42                if let Some(hashes) = hashes.get(&id) {
43                    hash_policy(&id, hashes.as_slice())
44                } else {
45                    HashPolicy::None
46                }
47            }
48            Self::Require(hashes) => {
49                let id = distribution.version_id();
50                hash_policy(&id, hashes.get(&id).map(Vec::as_slice).unwrap_or_default())
51            }
52        }
53    }
54
55    /// Return the [`HashPolicy`] for the given registry-based package.
56    pub fn get_package(&self, name: &PackageName, version: &Version) -> HashPolicy<'_> {
57        let id = VersionId::from_registry(name.clone(), version.clone());
58        match self {
59            Self::None => HashPolicy::None,
60            Self::Generate(mode) => HashPolicy::Generate(*mode),
61            Self::Verify(hashes) => {
62                if let Some(hashes) = hashes.get(&id) {
63                    HashPolicy::Any(hashes.as_slice())
64                } else {
65                    HashPolicy::None
66                }
67            }
68            Self::Require(hashes) => {
69                HashPolicy::Any(hashes.get(&id).map(Vec::as_slice).unwrap_or_default())
70            }
71        }
72    }
73
74    /// Return the [`HashPolicy`] for the given direct URL package.
75    ///
76    /// A direct URL identifies a single concrete artifact, so every provided digest must match.
77    pub fn get_url(&self, url: &DisplaySafeUrl) -> HashPolicy<'_> {
78        let id = VersionId::from_url(url);
79        match self {
80            Self::None => HashPolicy::None,
81            Self::Generate(mode) => HashPolicy::Generate(*mode),
82            Self::Verify(hashes) => {
83                if let Some(hashes) = hashes.get(&id) {
84                    HashPolicy::All(hashes.as_slice())
85                } else {
86                    HashPolicy::None
87                }
88            }
89            Self::Require(hashes) => {
90                HashPolicy::All(hashes.get(&id).map(Vec::as_slice).unwrap_or_default())
91            }
92        }
93    }
94
95    /// Returns `true` if the given registry-based package is allowed.
96    pub fn allows_package(&self, name: &PackageName, version: &Version) -> bool {
97        match self {
98            Self::None => true,
99            Self::Generate(_) => true,
100            Self::Verify(_) => true,
101            Self::Require(hashes) => {
102                hashes.contains_key(&VersionId::from_registry(name.clone(), version.clone()))
103            }
104        }
105    }
106
107    /// Returns `true` if the given direct URL package is allowed.
108    pub fn allows_url(&self, url: &DisplaySafeUrl) -> bool {
109        match self {
110            Self::None => true,
111            Self::Generate(_) => true,
112            Self::Verify(_) => true,
113            Self::Require(hashes) => hashes.contains_key(&VersionId::from_url(url)),
114        }
115    }
116
117    /// Return a [`HashStrategy`] augmented with archive URL hashes discovered in additional
118    /// requirements after the initial command-line parse.
119    pub fn augment_with_requirements<'a>(
120        self,
121        requirements: impl Iterator<Item = &'a Requirement>,
122    ) -> Result<Self, HashStrategyError> {
123        Ok(match self {
124            Self::None => Self::None,
125            Self::Generate(mode) => Self::Generate(mode),
126            Self::Verify(existing) => {
127                if let Some(hashes) = Self::augment_hashes(existing.as_ref(), requirements)? {
128                    Self::Verify(Arc::new(hashes))
129                } else {
130                    Self::Verify(existing)
131                }
132            }
133            Self::Require(existing) => {
134                if let Some(hashes) = Self::augment_hashes(existing.as_ref(), requirements)? {
135                    Self::Require(Arc::new(hashes))
136                } else {
137                    Self::Require(existing)
138                }
139            }
140        })
141    }
142
143    /// Generate the required hashes from a set of [`UnresolvedRequirement`] entries.
144    ///
145    /// When the environment is not given, this treats all marker expressions
146    /// that reference the environment as true. In other words, it does
147    /// environment independent expression evaluation. (Which in turn devolves
148    /// to "only evaluate marker expressions that reference an extra name.")
149    pub fn from_requirements<'a>(
150        requirements: impl Iterator<Item = (&'a UnresolvedRequirement, &'a [String])>,
151        constraints: impl Iterator<Item = (&'a Requirement, &'a [String])>,
152        marker_env: Option<&ResolverMarkerEnvironment>,
153        mode: HashCheckingMode,
154    ) -> Result<Self, HashStrategyError> {
155        let mut constraint_hashes = FxHashMap::<VersionId, Vec<HashDigest>>::default();
156
157        // First, index the constraints by name.
158        for (requirement, digests) in constraints {
159            if !requirement
160                .evaluate_markers(marker_env.map(ResolverMarkerEnvironment::markers), &[])
161            {
162                continue;
163            }
164
165            // Every constraint must be a pinned version.
166            let Some(id) = Self::pin(requirement) else {
167                if mode.is_require() {
168                    return Err(HashStrategyError::UnpinnedRequirement(
169                        requirement.to_string(),
170                        mode,
171                    ));
172                }
173                continue;
174            };
175
176            // Parse the hashes provided directly on the requirement, then merge in any hashes from
177            // the URL fragment.
178            let mut digests = digests
179                .iter()
180                .map(|digest| HashDigest::from_str(digest))
181                .collect::<Result<Vec<_>, _>>()?;
182            if let Some(fragment_hashes) = requirement.hashes().map(HashDigests::from) {
183                merge_digests(&mut digests, fragment_hashes.iter(), requirement)?;
184            }
185
186            if digests.is_empty() {
187                continue;
188            }
189
190            merge_hashes(&mut constraint_hashes, id, digests, requirement)?;
191        }
192
193        // For each requirement, map from hash identity to allowed hashes.
194        let mut requirement_hashes = FxHashMap::<VersionId, Vec<HashDigest>>::default();
195        for (requirement, digests) in requirements {
196            if !requirement
197                .evaluate_markers(marker_env.map(ResolverMarkerEnvironment::markers), &[])
198            {
199                continue;
200            }
201
202            // Every requirement must be either a pinned version or a direct URL.
203            let id = match &requirement {
204                UnresolvedRequirement::Named(requirement) => {
205                    if let Some(id) = Self::pin(requirement) {
206                        id
207                    } else {
208                        if mode.is_require() {
209                            return Err(HashStrategyError::UnpinnedRequirement(
210                                requirement.to_string(),
211                                mode,
212                            ));
213                        }
214                        continue;
215                    }
216                }
217                UnresolvedRequirement::Unnamed(requirement) => {
218                    // Direct URLs are always allowed.
219                    VersionId::from_parsed_url(&requirement.url.parsed_url)
220                }
221            };
222
223            // Parse the hashes provided directly on the requirement, then merge in any hashes from
224            // the URL fragment.
225            let mut digests = digests
226                .iter()
227                .map(|digest| HashDigest::from_str(digest))
228                .collect::<Result<Vec<_>, _>>()?;
229            if let Some(fragment_hashes) = requirement.hashes().map(HashDigests::from) {
230                merge_digests(&mut digests, fragment_hashes.iter(), requirement)?;
231            }
232
233            let digests = if let Some(constraint) = constraint_hashes.remove(&id) {
234                if digests.is_empty() {
235                    // If there are _only_ hashes on the constraints, use them.
236                    constraint
237                } else if matches!(id, VersionId::ArchiveUrl { .. }) {
238                    let mut merged = digests;
239                    merge_digests(&mut merged, &constraint, requirement)?;
240                    merged
241                } else {
242                    // If there are constraint and requirement hashes, take the intersection.
243                    let intersection: Vec<_> = digests
244                        .into_iter()
245                        .filter(|digest| constraint.contains(digest))
246                        .collect();
247                    if intersection.is_empty() {
248                        return Err(HashStrategyError::NoIntersection(
249                            requirement.to_string(),
250                            mode,
251                        ));
252                    }
253                    intersection
254                }
255            } else {
256                digests
257            };
258
259            // Under `--require-hashes`, every requirement must include a hash.
260            if digests.is_empty() {
261                if mode.is_require() {
262                    return Err(HashStrategyError::MissingHashes(
263                        requirement.to_string(),
264                        mode,
265                    ));
266                }
267                continue;
268            }
269
270            merge_hashes(&mut requirement_hashes, id, digests, requirement)?;
271        }
272
273        // Merge the hashes, preferring requirements over constraints, since overlapping
274        // requirements were already merged.
275        let hashes: FxHashMap<VersionId, Vec<HashDigest>> = constraint_hashes
276            .into_iter()
277            .chain(requirement_hashes)
278            .collect();
279        match mode {
280            HashCheckingMode::Verify => Ok(Self::Verify(Arc::new(hashes))),
281            HashCheckingMode::Require => Ok(Self::Require(Arc::new(hashes))),
282        }
283    }
284
285    /// Generate the required hashes from a [`Resolution`].
286    pub fn from_resolution(
287        resolution: &Resolution,
288        mode: HashCheckingMode,
289    ) -> Result<Self, HashStrategyError> {
290        let mut hashes = FxHashMap::<VersionId, Vec<HashDigest>>::default();
291
292        for (dist, digests) in resolution.hashes() {
293            if digests.is_empty() {
294                // Under `--require-hashes`, every requirement must include a hash.
295                if mode.is_require() {
296                    return Err(HashStrategyError::MissingHashes(
297                        dist.name().to_string(),
298                        mode,
299                    ));
300                }
301                continue;
302            }
303            hashes.insert(dist.version_id(), digests.to_vec());
304        }
305
306        match mode {
307            HashCheckingMode::Verify => Ok(Self::Verify(Arc::new(hashes))),
308            HashCheckingMode::Require => Ok(Self::Require(Arc::new(hashes))),
309        }
310    }
311
312    /// Augment an existing set of hashes with archive URL hashes discovered in additional
313    /// requirements.
314    ///
315    /// Archive URL requirements are keyed by a [`VersionId`] so that requirements that refer to
316    /// the same underlying archive but differ only in hash fragments are merged onto the same
317    /// digest set.
318    ///
319    /// Returns `Ok(None)` if no new hashes were added or updated.
320    fn augment_hashes<'a>(
321        existing: &FxHashMap<VersionId, Vec<HashDigest>>,
322        requirements: impl Iterator<Item = &'a Requirement>,
323    ) -> Result<Option<FxHashMap<VersionId, Vec<HashDigest>>>, HashStrategyError> {
324        let mut hashes = None;
325
326        for requirement in requirements {
327            let Some((id, digests)) = Self::requirement_hashes(requirement) else {
328                continue;
329            };
330            let current = hashes.as_ref().unwrap_or(existing);
331            let current_digests = current.get(&id);
332            let mut merged = current_digests.cloned().unwrap_or_default();
333            merge_digests(&mut merged, &digests, requirement)?;
334
335            if current_digests.map(Vec::as_slice) == Some(merged.as_slice()) {
336                continue;
337            }
338
339            hashes
340                .get_or_insert_with(|| existing.clone())
341                .insert(id, merged);
342        }
343
344        Ok(hashes)
345    }
346
347    /// Extract the archive URL hash target and digests for a requirement, if any.
348    fn requirement_hashes(requirement: &Requirement) -> Option<(VersionId, Vec<HashDigest>)> {
349        let mut digests = HashDigests::from(requirement.hashes()?).to_vec();
350        if digests.is_empty() {
351            return None;
352        }
353        digests.sort_unstable();
354        let id = Self::pin(requirement)?;
355        Some((id, digests))
356    }
357
358    /// Pin a [`Requirement`] to a [`VersionId`], if possible.
359    fn pin(requirement: &Requirement) -> Option<VersionId> {
360        match &requirement.source {
361            RequirementSource::Registry { specifier, .. } => {
362                // Must be a single specifier.
363                let [specifier] = specifier.as_ref() else {
364                    return None;
365                };
366
367                // Must be pinned to a specific version.
368                if *specifier.operator() != uv_pep440::Operator::Equal {
369                    return None;
370                }
371
372                Some(VersionId::from_registry(
373                    requirement.name.clone(),
374                    specifier.version().clone(),
375                ))
376            }
377            RequirementSource::Url {
378                location,
379                subdirectory,
380                ..
381            } => Some(VersionId::from_archive(location, subdirectory.as_deref())),
382            RequirementSource::Git {
383                git, subdirectory, ..
384            } => Some(VersionId::from_git(git, subdirectory.as_deref())),
385            RequirementSource::Path { install_path, .. } => {
386                Some(VersionId::from_path(install_path))
387            }
388            RequirementSource::Directory { install_path, .. } => {
389                Some(VersionId::from_directory(install_path))
390            }
391        }
392    }
393}
394
395fn hash_policy<'a>(id: &VersionId, digests: &'a [HashDigest]) -> HashPolicy<'a> {
396    match id {
397        VersionId::NameVersion { .. } => HashPolicy::Any(digests),
398        VersionId::ArchiveUrl { .. }
399        | VersionId::Git { .. }
400        | VersionId::Path { .. }
401        | VersionId::Directory { .. }
402        | VersionId::Unknown { .. } => HashPolicy::All(digests),
403    }
404}
405
406/// Merge repeated hashes for a requirement or constraint into the hash map.
407fn merge_hashes(
408    hashes: &mut FxHashMap<VersionId, Vec<HashDigest>>,
409    id: VersionId,
410    incoming: Vec<HashDigest>,
411    requirement: impl Display,
412) -> Result<(), HashStrategyError> {
413    if incoming.is_empty() {
414        return Ok(());
415    }
416
417    if !matches!(&id, VersionId::ArchiveUrl { .. }) {
418        hashes.insert(id, incoming);
419        return Ok(());
420    }
421
422    if let Some(existing) = hashes.get_mut(&id) {
423        return merge_digests(existing, &incoming, requirement);
424    }
425
426    let mut merged = Vec::new();
427    merge_digests(&mut merged, &incoming, requirement)?;
428    hashes.insert(id, merged);
429    Ok(())
430}
431
432/// Merge `incoming` digests into `existing`.
433///
434/// Exact duplicates are ignored. Digests for different algorithms are accumulated. If the
435/// same algorithm appears with two different values, returns
436/// [`HashStrategyError::ConflictingArchiveUrlHashes`].
437fn merge_digests<'a>(
438    existing: &mut Vec<HashDigest>,
439    incoming: impl IntoIterator<Item = &'a HashDigest>,
440    requirement: impl Display,
441) -> Result<(), HashStrategyError> {
442    for digest in incoming {
443        match existing
444            .iter()
445            .find(|candidate| candidate.algorithm == digest.algorithm)
446        {
447            Some(candidate) if candidate == digest => {}
448            Some(conflict) => {
449                return Err(HashStrategyError::ConflictingArchiveUrlHashes(
450                    requirement.to_string(),
451                    conflict.clone(),
452                    digest.clone(),
453                ));
454            }
455            None => existing.push(digest.clone()),
456        }
457    }
458    existing.sort_unstable();
459
460    Ok(())
461}
462
463#[derive(thiserror::Error, Debug)]
464pub enum HashStrategyError {
465    #[error(transparent)]
466    Hash(#[from] HashError),
467    #[error("Conflicting archive URL hashes for `{0}`: `{1}` conflicts with `{2}`")]
468    ConflictingArchiveUrlHashes(String, HashDigest, HashDigest),
469    #[error(
470        "In `{1}` mode, all requirements must have their versions pinned with `==`, but found: {0}"
471    )]
472    UnpinnedRequirement(String, HashCheckingMode),
473    #[error("In `{1}` mode, all requirements must have a hash, but none were provided for: {0}")]
474    MissingHashes(String, HashCheckingMode),
475    #[error(
476        "In `{1}` mode, all requirements must have a hash, but there were no overlapping hashes between the requirements and constraints for: {0}"
477    )]
478    NoIntersection(String, HashCheckingMode),
479}
480
481#[cfg(test)]
482mod tests {
483    use std::str::FromStr;
484    use uv_configuration::HashCheckingMode;
485    use uv_distribution_filename::DistExtension;
486    use uv_distribution_types::{
487        HashPolicy, Requirement, RequirementSource, UnresolvedRequirement,
488    };
489    use uv_pypi_types::HashDigest;
490
491    use super::HashStrategy;
492
493    fn requirement(url: &str) -> Requirement {
494        Requirement {
495            name: "anyio".parse().unwrap(),
496            extras: Box::default(),
497            groups: Box::default(),
498            marker: "python_version >= '3.8'".parse().unwrap(),
499            source: RequirementSource::Url {
500                location: "https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl"
501                    .parse()
502                    .unwrap(),
503                subdirectory: None,
504                ext: DistExtension::Wheel,
505                url: url.parse().unwrap(),
506            },
507            origin: None,
508        }
509    }
510
511    #[test]
512    fn from_requirements_merges_direct_url_hashes_across_fragments() {
513        let first = UnresolvedRequirement::Named(requirement(
514            "https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl#sha256=cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f",
515        ));
516        let second = UnresolvedRequirement::Named(requirement(
517            "https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl#sha512=f30761c1e8725b49c498273b90dba4b05c0fd157811994c806183062cb6647e773364ce45f0e1ff0b10e32fe6d0232ea5ad39476ccf37109d6b49603a09c11c2",
518        ));
519
520        let hasher = HashStrategy::from_requirements(
521            [(&first, &[][..]), (&second, &[][..])].into_iter(),
522            std::iter::empty(),
523            None,
524            HashCheckingMode::Require,
525        )
526        .unwrap();
527
528        let mut expected = vec![
529            HashDigest::from_str(
530                "sha256:cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f",
531            )
532            .unwrap(),
533            HashDigest::from_str(
534                "sha512:f30761c1e8725b49c498273b90dba4b05c0fd157811994c806183062cb6647e773364ce45f0e1ff0b10e32fe6d0232ea5ad39476ccf37109d6b49603a09c11c2",
535            )
536            .unwrap(),
537        ];
538        expected.sort_unstable();
539
540        for requirement in [&first, &second] {
541            let UnresolvedRequirement::Named(requirement) = requirement else {
542                panic!("expected named requirement");
543            };
544            let RequirementSource::Url { url, .. } = &requirement.source else {
545                panic!("expected direct URL requirement");
546            };
547            assert_eq!(hasher.get_url(url), HashPolicy::All(expected.as_slice()));
548        }
549    }
550}