1use futures::executor::block_on;
7use futures::io::AllowStdIo;
8use std::io;
9use std::path::Path;
10use thiserror::Error;
11use tokio::io::AsyncReadExt;
12use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
13use uv_distribution_filename::WheelFilename;
14use uv_normalize::{DistInfoName, InvalidNameError};
15use uv_pypi_types::ResolutionMetadata;
16
17#[derive(Debug, Error)]
19pub enum Error {
20 #[error("Failed to read `dist-info` metadata from built wheel")]
21 DistInfo,
22 #[error("No .dist-info directory found")]
23 MissingDistInfo,
24 #[error("Multiple .dist-info directories found: {0}")]
25 MultipleDistInfo(String),
26 #[error(
27 "The .dist-info directory does not consist of the normalized package name and version: `{0}`"
28 )]
29 MissingDistInfoSegments(String),
30 #[error("The .dist-info directory {0} does not start with the normalized package name: {1}")]
31 MissingDistInfoPackageName(String, String),
32 #[error("The .dist-info directory name contains invalid characters")]
33 InvalidName(#[from] InvalidNameError),
34 #[error("The metadata at {0} is invalid")]
35 InvalidMetadata(String, Box<uv_pypi_types::MetadataError>),
36 #[error("Bad CRC (got {computed:08x}, expected {expected:08x}) for file: {path}")]
37 BadCrc32 {
38 path: String,
39 computed: u32,
40 expected: u32,
41 },
42 #[error("Failed to read from zip file")]
43 AsyncZip(#[from] async_zip::error::ZipError),
44 #[error(transparent)]
46 Io(io::Error),
47}
48
49pub fn find_archive_dist_info<'a, T: Copy>(
55 filename: &WheelFilename,
56 files: impl Iterator<Item = (T, &'a str)>,
57) -> Result<(T, &'a str), Error> {
58 let metadatas: Vec<_> = files
59 .filter_map(|(payload, path)| {
60 let (dist_info_dir, file) = path.split_once('/')?;
61 if file != "METADATA" {
62 return None;
63 }
64 let dist_info_prefix = dist_info_dir.strip_suffix(".dist-info")?;
65 Some((payload, dist_info_prefix))
66 })
67 .collect();
68
69 let (payload, dist_info_prefix) = match metadatas[..] {
71 [] => {
72 return Err(Error::MissingDistInfo);
73 }
74 [(payload, path)] => (payload, path),
75 _ => {
76 return Err(Error::MultipleDistInfo(
77 metadatas
78 .into_iter()
79 .map(|(_, dist_info_dir)| dist_info_dir.to_string())
80 .collect::<Vec<_>>()
81 .join(", "),
82 ));
83 }
84 };
85
86 let normalized_prefix = DistInfoName::new(dist_info_prefix);
89 if !normalized_prefix
90 .as_ref()
91 .starts_with(filename.name.as_str())
92 {
93 return Err(Error::MissingDistInfoPackageName(
94 dist_info_prefix.to_string(),
95 filename.name.to_string(),
96 ));
97 }
98
99 Ok((payload, dist_info_prefix))
100}
101
102fn is_metadata_entry(path: &str, filename: &WheelFilename) -> Result<bool, Error> {
105 let Some((dist_info_dir, file)) = path.split_once('/') else {
106 return Ok(false);
107 };
108 if file != "METADATA" {
109 return Ok(false);
110 }
111 let Some(dist_info_prefix) = dist_info_dir.strip_suffix(".dist-info") else {
112 return Ok(false);
113 };
114
115 let normalized_prefix = DistInfoName::new(dist_info_prefix);
118 if !normalized_prefix
119 .as_ref()
120 .starts_with(filename.name.as_str())
121 {
122 return Err(Error::MissingDistInfoPackageName(
123 dist_info_prefix.to_string(),
124 filename.name.to_string(),
125 ));
126 }
127
128 Ok(true)
129}
130
131pub fn read_archive_metadata(
133 filename: &WheelFilename,
134 reader: impl std::io::BufRead + std::io::Seek + Unpin,
135) -> Result<Vec<u8>, Error> {
136 block_on(async {
137 let mut zip_reader =
138 async_zip::base::read::seek::ZipFileReader::new(AllowStdIo::new(reader)).await?;
139
140 let (metadata_index, _dist_info_prefix) = find_archive_dist_info(
141 filename,
142 zip_reader
143 .file()
144 .entries()
145 .iter()
146 .enumerate()
147 .filter_map(|(index, entry)| Some((index, entry.filename().as_str().ok()?))),
148 )?;
149
150 let mut buffer = Vec::new();
151 zip_reader
152 .reader_with_entry(metadata_index)
153 .await?
154 .read_to_end_checked(&mut buffer)
155 .await?;
156
157 Ok(buffer)
158 })
159}
160
161fn find_flat_dist_info(filename: &WheelFilename, path: impl AsRef<Path>) -> Result<String, Error> {
165 let Some(dist_info_prefix) = fs_err::read_dir(path.as_ref())
167 .map_err(Error::Io)?
168 .find_map(|entry| {
169 let entry = entry.ok()?;
170 let file_type = entry.file_type().ok()?;
171 if file_type.is_dir() {
172 let path = entry.path();
173
174 let extension = path.extension()?;
175 if extension != "dist-info" {
176 return None;
177 }
178
179 let dist_info_prefix = path.file_stem()?.to_str()?;
180 Some(dist_info_prefix.to_string())
181 } else {
182 None
183 }
184 })
185 else {
186 return Err(Error::MissingDistInfo);
187 };
188
189 let normalized_prefix = DistInfoName::new(&dist_info_prefix);
192 if !normalized_prefix
193 .as_ref()
194 .starts_with(filename.name.as_str())
195 {
196 return Err(Error::MissingDistInfoPackageName(
197 dist_info_prefix,
198 filename.name.to_string(),
199 ));
200 }
201
202 Ok(dist_info_prefix)
203}
204
205fn read_dist_info_metadata(
207 dist_info_prefix: &str,
208 wheel: impl AsRef<Path>,
209) -> Result<Vec<u8>, Error> {
210 let metadata_file = wheel
211 .as_ref()
212 .join(format!("{dist_info_prefix}.dist-info/METADATA"));
213 fs_err::read(metadata_file).map_err(Error::Io)
214}
215
216pub async fn read_metadata_async_seek(
218 filename: &WheelFilename,
219 reader: impl tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin,
220) -> Result<Vec<u8>, Error> {
221 let reader = futures::io::BufReader::new(reader.compat());
222 let mut zip_reader = async_zip::base::read::seek::ZipFileReader::new(reader).await?;
223
224 let (metadata_idx, _dist_info_prefix) = find_archive_dist_info(
225 filename,
226 zip_reader
227 .file()
228 .entries()
229 .iter()
230 .enumerate()
231 .filter_map(|(index, entry)| Some((index, entry.filename().as_str().ok()?))),
232 )?;
233
234 let mut contents = Vec::new();
236 zip_reader
237 .reader_with_entry(metadata_idx)
238 .await?
239 .read_to_end_checked(&mut contents)
240 .await?;
241
242 Ok(contents)
243}
244
245pub async fn read_metadata_async_stream<R: futures::AsyncRead + Unpin>(
247 filename: &WheelFilename,
248 debug_path: &str,
249 reader: R,
250) -> Result<ResolutionMetadata, Error> {
251 let reader = futures::io::BufReader::with_capacity(128 * 1024, reader);
252 let mut zip = async_zip::base::read::stream::ZipFileReader::new(reader);
253
254 while let Some(mut entry) = zip.next_with_entry().await? {
255 let path = entry.reader().entry().filename().as_str()?.to_owned();
257
258 if is_metadata_entry(&path, filename)? {
259 let mut reader = entry.reader_mut().compat();
260 let mut contents = Vec::new();
261 reader.read_to_end(&mut contents).await.map_err(Error::Io)?;
262
263 let reader = reader.into_inner();
266 let computed = reader.compute_hash();
267 let expected = reader.entry().crc32();
268 if computed != expected {
269 let error = Error::BadCrc32 {
270 path,
271 computed,
272 expected,
273 };
274 if expected == 0 {
280 tracing::warn!("presumed missing CRC: {error}");
281 } else {
282 return Err(error);
283 }
284 }
285
286 let metadata = ResolutionMetadata::parse_metadata(&contents)
287 .map_err(|err| Error::InvalidMetadata(debug_path.to_string(), Box::new(err)))?;
288 return Ok(metadata);
289 }
290
291 (.., zip) = entry.skip().await?;
294 }
295
296 Err(Error::MissingDistInfo)
297}
298
299pub fn read_flat_wheel_metadata(
301 filename: &WheelFilename,
302 wheel: impl AsRef<Path>,
303) -> Result<ResolutionMetadata, Error> {
304 let dist_info_prefix = find_flat_dist_info(filename, &wheel)?;
305 let metadata = read_dist_info_metadata(&dist_info_prefix, &wheel)?;
306 ResolutionMetadata::parse_metadata(&metadata).map_err(|err| {
307 Error::InvalidMetadata(
308 format!("{dist_info_prefix}.dist-info/METADATA"),
309 Box::new(err),
310 )
311 })
312}
313
314#[cfg(test)]
315mod test {
316 use super::find_archive_dist_info;
317 use std::str::FromStr;
318 use uv_distribution_filename::WheelFilename;
319
320 #[test]
321 fn test_dot_in_name() {
322 let files = [
323 "mastodon/Mastodon.py",
324 "mastodon/__init__.py",
325 "mastodon/streaming.py",
326 "Mastodon.py-1.5.1.dist-info/DESCRIPTION.rst",
327 "Mastodon.py-1.5.1.dist-info/metadata.json",
328 "Mastodon.py-1.5.1.dist-info/top_level.txt",
329 "Mastodon.py-1.5.1.dist-info/WHEEL",
330 "Mastodon.py-1.5.1.dist-info/METADATA",
331 "Mastodon.py-1.5.1.dist-info/RECORD",
332 ];
333 let filename = WheelFilename::from_str("Mastodon.py-1.5.1-py2.py3-none-any.whl").unwrap();
334 let (_, dist_info_prefix) =
335 find_archive_dist_info(&filename, files.into_iter().map(|file| (file, file))).unwrap();
336 assert_eq!(dist_info_prefix, "Mastodon.py-1.5.1");
337 }
338}