1use std::io;
7use std::io::{Read, Seek};
8use std::path::Path;
9use thiserror::Error;
10use tokio::io::AsyncReadExt;
11use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
12use uv_distribution_filename::WheelFilename;
13use uv_normalize::{DistInfoName, InvalidNameError};
14use uv_pypi_types::ResolutionMetadata;
15use zip::ZipArchive;
16
17#[derive(Debug, Error)]
19pub enum Error {
20 #[error("Failed to read `dist-info` metadata from built wheel")]
21 DistInfo,
22 #[error("No .dist-info directory found")]
23 MissingDistInfo,
24 #[error("Multiple .dist-info directories found: {0}")]
25 MultipleDistInfo(String),
26 #[error(
27 "The .dist-info directory does not consist of the normalized package name and version: `{0}`"
28 )]
29 MissingDistInfoSegments(String),
30 #[error("The .dist-info directory {0} does not start with the normalized package name: {1}")]
31 MissingDistInfoPackageName(String, String),
32 #[error("The .dist-info directory name contains invalid characters")]
33 InvalidName(#[from] InvalidNameError),
34 #[error("The metadata at {0} is invalid")]
35 InvalidMetadata(String, Box<uv_pypi_types::MetadataError>),
36 #[error("Bad CRC (got {computed:08x}, expected {expected:08x}) for file: {path}")]
37 BadCrc32 {
38 path: String,
39 computed: u32,
40 expected: u32,
41 },
42 #[error("Failed to read from zip file")]
43 Zip(#[from] zip::result::ZipError),
44 #[error("Failed to read from zip file")]
45 AsyncZip(#[from] async_zip::error::ZipError),
46 #[error(transparent)]
48 Io(io::Error),
49}
50
51pub fn find_archive_dist_info<'a, T: Copy>(
57 filename: &WheelFilename,
58 files: impl Iterator<Item = (T, &'a str)>,
59) -> Result<(T, &'a str), Error> {
60 let metadatas: Vec<_> = files
61 .filter_map(|(payload, path)| {
62 let (dist_info_dir, file) = path.split_once('/')?;
63 if file != "METADATA" {
64 return None;
65 }
66 let dist_info_prefix = dist_info_dir.strip_suffix(".dist-info")?;
67 Some((payload, dist_info_prefix))
68 })
69 .collect();
70
71 let (payload, dist_info_prefix) = match metadatas[..] {
73 [] => {
74 return Err(Error::MissingDistInfo);
75 }
76 [(payload, path)] => (payload, path),
77 _ => {
78 return Err(Error::MultipleDistInfo(
79 metadatas
80 .into_iter()
81 .map(|(_, dist_info_dir)| dist_info_dir.to_string())
82 .collect::<Vec<_>>()
83 .join(", "),
84 ));
85 }
86 };
87
88 let normalized_prefix = DistInfoName::new(dist_info_prefix);
91 if !normalized_prefix
92 .as_ref()
93 .starts_with(filename.name.as_str())
94 {
95 return Err(Error::MissingDistInfoPackageName(
96 dist_info_prefix.to_string(),
97 filename.name.to_string(),
98 ));
99 }
100
101 Ok((payload, dist_info_prefix))
102}
103
104pub fn is_metadata_entry(path: &str, filename: &WheelFilename) -> Result<bool, Error> {
107 let Some((dist_info_dir, file)) = path.split_once('/') else {
108 return Ok(false);
109 };
110 if file != "METADATA" {
111 return Ok(false);
112 }
113 let Some(dist_info_prefix) = dist_info_dir.strip_suffix(".dist-info") else {
114 return Ok(false);
115 };
116
117 let normalized_prefix = DistInfoName::new(dist_info_prefix);
120 if !normalized_prefix
121 .as_ref()
122 .starts_with(filename.name.as_str())
123 {
124 return Err(Error::MissingDistInfoPackageName(
125 dist_info_prefix.to_string(),
126 filename.name.to_string(),
127 ));
128 }
129
130 Ok(true)
131}
132
133pub fn read_archive_metadata(
135 filename: &WheelFilename,
136 archive: &mut ZipArchive<impl Read + Seek + Sized>,
137) -> Result<Vec<u8>, Error> {
138 let dist_info_prefix =
139 find_archive_dist_info(filename, archive.file_names().map(|name| (name, name)))?.1;
140
141 let mut file = archive.by_name(&format!("{dist_info_prefix}.dist-info/METADATA"))?;
142
143 #[allow(clippy::cast_possible_truncation)]
144 let mut buffer = Vec::with_capacity(file.size() as usize);
145 file.read_to_end(&mut buffer).map_err(Error::Io)?;
146
147 Ok(buffer)
148}
149
150pub fn find_flat_dist_info(
154 filename: &WheelFilename,
155 path: impl AsRef<Path>,
156) -> Result<String, Error> {
157 let Some(dist_info_prefix) = fs_err::read_dir(path.as_ref())
159 .map_err(Error::Io)?
160 .find_map(|entry| {
161 let entry = entry.ok()?;
162 let file_type = entry.file_type().ok()?;
163 if file_type.is_dir() {
164 let path = entry.path();
165
166 let extension = path.extension()?;
167 if extension != "dist-info" {
168 return None;
169 }
170
171 let dist_info_prefix = path.file_stem()?.to_str()?;
172 Some(dist_info_prefix.to_string())
173 } else {
174 None
175 }
176 })
177 else {
178 return Err(Error::MissingDistInfo);
179 };
180
181 let normalized_prefix = DistInfoName::new(&dist_info_prefix);
184 if !normalized_prefix
185 .as_ref()
186 .starts_with(filename.name.as_str())
187 {
188 return Err(Error::MissingDistInfoPackageName(
189 dist_info_prefix,
190 filename.name.to_string(),
191 ));
192 }
193
194 Ok(dist_info_prefix)
195}
196
197pub fn read_dist_info_metadata(
199 dist_info_prefix: &str,
200 wheel: impl AsRef<Path>,
201) -> Result<Vec<u8>, Error> {
202 let metadata_file = wheel
203 .as_ref()
204 .join(format!("{dist_info_prefix}.dist-info/METADATA"));
205 fs_err::read(metadata_file).map_err(Error::Io)
206}
207
208pub async fn read_metadata_async_seek(
210 filename: &WheelFilename,
211 reader: impl tokio::io::AsyncRead + tokio::io::AsyncSeek + Unpin,
212) -> Result<Vec<u8>, Error> {
213 let reader = futures::io::BufReader::new(reader.compat());
214 let mut zip_reader = async_zip::base::read::seek::ZipFileReader::new(reader).await?;
215
216 let (metadata_idx, _dist_info_prefix) = find_archive_dist_info(
217 filename,
218 zip_reader
219 .file()
220 .entries()
221 .iter()
222 .enumerate()
223 .filter_map(|(index, entry)| Some((index, entry.filename().as_str().ok()?))),
224 )?;
225
226 let mut contents = Vec::new();
228 zip_reader
229 .reader_with_entry(metadata_idx)
230 .await?
231 .read_to_end_checked(&mut contents)
232 .await?;
233
234 Ok(contents)
235}
236
237pub async fn read_metadata_async_stream<R: futures::AsyncRead + Unpin>(
239 filename: &WheelFilename,
240 debug_path: &str,
241 reader: R,
242) -> Result<ResolutionMetadata, Error> {
243 let reader = futures::io::BufReader::with_capacity(128 * 1024, reader);
244 let mut zip = async_zip::base::read::stream::ZipFileReader::new(reader);
245
246 while let Some(mut entry) = zip.next_with_entry().await? {
247 let path = entry.reader().entry().filename().as_str()?.to_owned();
249
250 if is_metadata_entry(&path, filename)? {
251 let mut reader = entry.reader_mut().compat();
252 let mut contents = Vec::new();
253 reader.read_to_end(&mut contents).await.unwrap();
254
255 let reader = reader.into_inner();
258 let computed = reader.compute_hash();
259 let expected = reader.entry().crc32();
260 if computed != expected {
261 let error = Error::BadCrc32 {
262 path,
263 computed,
264 expected,
265 };
266 if expected == 0 {
272 tracing::warn!("presumed missing CRC: {error}");
273 } else {
274 return Err(error);
275 }
276 }
277
278 let metadata = ResolutionMetadata::parse_metadata(&contents)
279 .map_err(|err| Error::InvalidMetadata(debug_path.to_string(), Box::new(err)))?;
280 return Ok(metadata);
281 }
282
283 (.., zip) = entry.skip().await?;
286 }
287
288 Err(Error::MissingDistInfo)
289}
290
291pub fn read_flat_wheel_metadata(
293 filename: &WheelFilename,
294 wheel: impl AsRef<Path>,
295) -> Result<ResolutionMetadata, Error> {
296 let dist_info_prefix = find_flat_dist_info(filename, &wheel)?;
297 let metadata = read_dist_info_metadata(&dist_info_prefix, &wheel)?;
298 ResolutionMetadata::parse_metadata(&metadata).map_err(|err| {
299 Error::InvalidMetadata(
300 format!("{dist_info_prefix}.dist-info/METADATA"),
301 Box::new(err),
302 )
303 })
304}
305
306#[cfg(test)]
307mod test {
308 use super::find_archive_dist_info;
309 use std::str::FromStr;
310 use uv_distribution_filename::WheelFilename;
311
312 #[test]
313 fn test_dot_in_name() {
314 let files = [
315 "mastodon/Mastodon.py",
316 "mastodon/__init__.py",
317 "mastodon/streaming.py",
318 "Mastodon.py-1.5.1.dist-info/DESCRIPTION.rst",
319 "Mastodon.py-1.5.1.dist-info/metadata.json",
320 "Mastodon.py-1.5.1.dist-info/top_level.txt",
321 "Mastodon.py-1.5.1.dist-info/WHEEL",
322 "Mastodon.py-1.5.1.dist-info/METADATA",
323 "Mastodon.py-1.5.1.dist-info/RECORD",
324 ];
325 let filename = WheelFilename::from_str("Mastodon.py-1.5.1-py2.py3-none-any.whl").unwrap();
326 let (_, dist_info_prefix) =
327 find_archive_dist_info(&filename, files.into_iter().map(|file| (file, file))).unwrap();
328 assert_eq!(dist_info_prefix, "Mastodon.py-1.5.1");
329 }
330}