1use futures::executor::block_on;
7use futures::io::AllowStdIo;
8use std::io;
9use std::path::Path;
10use thiserror::Error;
11use tokio::io::AsyncReadExt;
12use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
13use uv_distribution_filename::WheelFilename;
14use uv_normalize::{DistInfoName, InvalidNameError};
15use uv_pypi_types::ResolutionMetadata;
16
17#[derive(Debug, Error)]
19pub enum Error {
20 #[error("Failed to read `dist-info` metadata from built wheel")]
21 DistInfo,
22 #[error("No .dist-info directory found")]
23 MissingDistInfo,
24 #[error("Multiple .dist-info directories found: {0}")]
25 MultipleDistInfo(String),
26 #[error(
27 "The .dist-info directory does not consist of the normalized package name and version: `{0}`"
28 )]
29 MissingDistInfoSegments(String),
30 #[error("The .dist-info directory {0} does not start with the normalized package name: {1}")]
31 MissingDistInfoPackageName(String, String),
32 #[error("The .dist-info directory name contains invalid characters")]
33 InvalidName(#[from] InvalidNameError),
34 #[error("The metadata at {0} is invalid")]
35 InvalidMetadata(String, Box<uv_pypi_types::MetadataError>),
36 #[error("Bad CRC (got {computed:08x}, expected {expected:08x}) for file: {path}")]
37 BadCrc32 {
38 path: String,
39 computed: u32,
40 expected: u32,
41 },
42 #[error("Failed to read from zip file")]
43 AsyncZip(#[from] async_zip::error::ZipError),
44 #[error(transparent)]
46 Io(io::Error),
47}
48
49pub fn find_archive_dist_info<'a, T: Copy>(
55 filename: &WheelFilename,
56 files: impl Iterator<Item = (T, &'a str)>,
57) -> Result<(T, &'a str), Error> {
58 let metadatas: Vec<_> = files
59 .filter_map(|(payload, path)| {
60 let (dist_info_dir, file) = path.split_once('/')?;
61 if file != "METADATA" {
62 return None;
63 }
64 let dist_info_prefix = dist_info_dir.strip_suffix(".dist-info")?;
65 Some((payload, dist_info_prefix))
66 })
67 .collect();
68
69 let (payload, dist_info_prefix) = match metadatas[..] {
71 [] => {
72 return Err(Error::MissingDistInfo);
73 }
74 [(payload, path)] => (payload, path),
75 _ => {
76 return Err(Error::MultipleDistInfo(
77 metadatas
78 .into_iter()
79 .map(|(_, dist_info_dir)| dist_info_dir.to_string())
80 .collect::<Vec<_>>()
81 .join(", "),
82 ));
83 }
84 };
85
86 let normalized_prefix = DistInfoName::new(dist_info_prefix);
89 if !normalized_prefix
90 .as_ref()
91 .starts_with(filename.name.as_str())
92 {
93 return Err(Error::MissingDistInfoPackageName(
94 dist_info_prefix.to_string(),
95 filename.name.to_string(),
96 ));
97 }
98
99 Ok((payload, dist_info_prefix))
100}
101
102pub fn is_metadata_entry(path: &str, filename: &WheelFilename) -> Result<bool, Error> {
105 let Some((dist_info_dir, file)) = path.split_once('/') else {
106 return Ok(false);
107 };
108 if file != "METADATA" {
109 return Ok(false);
110 }
111 let Some(dist_info_prefix) = dist_info_dir.strip_suffix(".dist-info") else {
112 return Ok(false);
113 };
114
115 let normalized_prefix = DistInfoName::new(dist_info_prefix);
118 if !normalized_prefix
119 .as_ref()
120 .starts_with(filename.name.as_str())
121 {
122 return Err(Error::MissingDistInfoPackageName(
123 dist_info_prefix.to_string(),
124 filename.name.to_string(),
125 ));
126 }
127
128 Ok(true)
129}
130
131pub fn read_archive_metadata(
133 filename: &WheelFilename,
134 reader: impl std::io::BufRead + std::io::Seek + Unpin,
135) -> Result<Vec<u8>, Error> {
136 block_on(async {
137 let mut zip_reader =
138 async_zip::base::read::seek::ZipFileReader::new(AllowStdIo::new(reader)).await?;
139
140 let (metadata_index, _dist_info_prefix) = find_archive_dist_info(
141 filename,
142 zip_reader
143 .file()
144 .entries()
145 .iter()
146 .enumerate()
147 .filter_map(|(index, entry)| Some((index, entry.filename().as_str().ok()?))),
148 )?;
149
150 let mut buffer = Vec::new();
151 zip_reader
152 .reader_with_entry(metadata_index)
153 .await?
154 .read_to_end_checked(&mut buffer)
155 .await?;
156
157 Ok(buffer)
158 })
159}
160
161pub fn find_flat_dist_info(
165 filename: &WheelFilename,
166 path: impl AsRef<Path>,
167) -> Result<String, Error> {
168 let Some(dist_info_prefix) = fs_err::read_dir(path.as_ref())
170 .map_err(Error::Io)?
171 .find_map(|entry| {
172 let entry = entry.ok()?;
173 let file_type = entry.file_type().ok()?;
174 if file_type.is_dir() {
175 let path = entry.path();
176
177 let extension = path.extension()?;
178 if extension != "dist-info" {
179 return None;
180 }
181
182 let dist_info_prefix = path.file_stem()?.to_str()?;
183 Some(dist_info_prefix.to_string())
184 } else {
185 None
186 }
187 })
188 else {
189 return Err(Error::MissingDistInfo);
190 };
191
192 let normalized_prefix = DistInfoName::new(&dist_info_prefix);
195 if !normalized_prefix
196 .as_ref()
197 .starts_with(filename.name.as_str())
198 {
199 return Err(Error::MissingDistInfoPackageName(
200 dist_info_prefix,
201 filename.name.to_string(),
202 ));
203 }
204
205 Ok(dist_info_prefix)
206}
207
208pub fn read_dist_info_metadata(
210 dist_info_prefix: &str,
211 wheel: impl AsRef<Path>,
212) -> Result<Vec<u8>, Error> {
213 let metadata_file = wheel
214 .as_ref()
215 .join(format!("{dist_info_prefix}.dist-info/METADATA"));
216 fs_err::read(metadata_file).map_err(Error::Io)
217}
218
219pub async fn read_metadata_async_seek(
221 filename: &WheelFilename,
222 reader: impl tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin,
223) -> Result<Vec<u8>, Error> {
224 let reader = futures::io::BufReader::new(reader.compat());
225 let mut zip_reader = async_zip::base::read::seek::ZipFileReader::new(reader).await?;
226
227 let (metadata_idx, _dist_info_prefix) = find_archive_dist_info(
228 filename,
229 zip_reader
230 .file()
231 .entries()
232 .iter()
233 .enumerate()
234 .filter_map(|(index, entry)| Some((index, entry.filename().as_str().ok()?))),
235 )?;
236
237 let mut contents = Vec::new();
239 zip_reader
240 .reader_with_entry(metadata_idx)
241 .await?
242 .read_to_end_checked(&mut contents)
243 .await?;
244
245 Ok(contents)
246}
247
248pub async fn read_metadata_async_stream<R: futures::AsyncRead + Unpin>(
250 filename: &WheelFilename,
251 debug_path: &str,
252 reader: R,
253) -> Result<ResolutionMetadata, Error> {
254 let reader = futures::io::BufReader::with_capacity(128 * 1024, reader);
255 let mut zip = async_zip::base::read::stream::ZipFileReader::new(reader);
256
257 while let Some(mut entry) = zip.next_with_entry().await? {
258 let path = entry.reader().entry().filename().as_str()?.to_owned();
260
261 if is_metadata_entry(&path, filename)? {
262 let mut reader = entry.reader_mut().compat();
263 let mut contents = Vec::new();
264 reader.read_to_end(&mut contents).await.map_err(Error::Io)?;
265
266 let reader = reader.into_inner();
269 let computed = reader.compute_hash();
270 let expected = reader.entry().crc32();
271 if computed != expected {
272 let error = Error::BadCrc32 {
273 path,
274 computed,
275 expected,
276 };
277 if expected == 0 {
283 tracing::warn!("presumed missing CRC: {error}");
284 } else {
285 return Err(error);
286 }
287 }
288
289 let metadata = ResolutionMetadata::parse_metadata(&contents)
290 .map_err(|err| Error::InvalidMetadata(debug_path.to_string(), Box::new(err)))?;
291 return Ok(metadata);
292 }
293
294 (.., zip) = entry.skip().await?;
297 }
298
299 Err(Error::MissingDistInfo)
300}
301
302pub fn read_flat_wheel_metadata(
304 filename: &WheelFilename,
305 wheel: impl AsRef<Path>,
306) -> Result<ResolutionMetadata, Error> {
307 let dist_info_prefix = find_flat_dist_info(filename, &wheel)?;
308 let metadata = read_dist_info_metadata(&dist_info_prefix, &wheel)?;
309 ResolutionMetadata::parse_metadata(&metadata).map_err(|err| {
310 Error::InvalidMetadata(
311 format!("{dist_info_prefix}.dist-info/METADATA"),
312 Box::new(err),
313 )
314 })
315}
316
317#[cfg(test)]
318mod test {
319 use super::find_archive_dist_info;
320 use std::str::FromStr;
321 use uv_distribution_filename::WheelFilename;
322
323 #[test]
324 fn test_dot_in_name() {
325 let files = [
326 "mastodon/Mastodon.py",
327 "mastodon/__init__.py",
328 "mastodon/streaming.py",
329 "Mastodon.py-1.5.1.dist-info/DESCRIPTION.rst",
330 "Mastodon.py-1.5.1.dist-info/metadata.json",
331 "Mastodon.py-1.5.1.dist-info/top_level.txt",
332 "Mastodon.py-1.5.1.dist-info/WHEEL",
333 "Mastodon.py-1.5.1.dist-info/METADATA",
334 "Mastodon.py-1.5.1.dist-info/RECORD",
335 ];
336 let filename = WheelFilename::from_str("Mastodon.py-1.5.1-py2.py3-none-any.whl").unwrap();
337 let (_, dist_info_prefix) =
338 find_archive_dist_info(&filename, files.into_iter().map(|file| (file, file))).unwrap();
339 assert_eq!(dist_info_prefix, "Mastodon.py-1.5.1");
340 }
341}