debian_analyzer/
snapshot.rs

1//! Interacting with snapshot.debian.org
2use debversion::Version;
3use sha1::Digest;
4use std::collections::HashMap;
5use std::fs::File;
6use std::path::{Path, PathBuf};
7
8#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
9/// A struct representing a file in a snapshot
10struct FileInfo {
11    archive_name: String,
12
13    /// The date the file was first seen
14    first_seen: chrono::DateTime<chrono::Utc>,
15
16    /// The name of the file
17    name: String,
18    /// Path to the file
19    path: String,
20
21    /// The size of the file
22    size: usize,
23}
24
25#[derive(Debug)]
26/// An error that can occur while downloading a snapshot
27pub enum Error {
28    /// An error occurred while downloading a snapshot
29    SnapshotDownloadError(String, reqwest::Error, Option<bool>),
30
31    /// The snapshot is missing
32    SnapshotMissing(String, Version),
33
34    /// The hash of a file in the snapshot does not match the expected hash
35    SnapshotHashMismatch {
36        /// The filename of the file
37        filename: String,
38
39        /// The actual hash of the file
40        actual_hash: String,
41
42        /// The expected hash of the file
43        expected_hash: String,
44    },
45}
46
47impl std::fmt::Display for Error {
48    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
49        match self {
50            Error::SnapshotDownloadError(url, e, Some(true)) => {
51                write!(f, "Snapshot download error: {} (server error) {}", url, e)
52            }
53            Error::SnapshotDownloadError(url, e, _) => {
54                write!(f, "Snapshot download error: {} {}", url, e)
55            }
56            Error::SnapshotMissing(package, version) => {
57                write!(f, "Snapshot missing: {} {}", package, version)
58            }
59            Error::SnapshotHashMismatch {
60                filename,
61                actual_hash,
62                expected_hash,
63            } => {
64                write!(
65                    f,
66                    "Hash mismatch for {}: expected {} but got {}",
67                    filename, expected_hash, actual_hash
68                )
69            }
70        }
71    }
72}
73
74#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
75struct FileHash {
76    hash: String,
77}
78
79#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
80struct SrcFiles {
81    fileinfo: HashMap<String, Vec<FileInfo>>,
82
83    package: String,
84
85    version: Version,
86
87    result: Vec<FileHash>,
88
89    #[serde(rename = "_comment")]
90    comment: String,
91}
92
93/// Download a snapshot of a package
94pub fn download_snapshot(
95    package: &str,
96    version: &Version,
97    output_dir: &Path,
98) -> Result<PathBuf, Error> {
99    log::info!("Downloading {} {}", package, version);
100    let srcfiles_url = format!(
101        "https://snapshot.debian.org/mr/package/{}/{}/srcfiles?fileinfo=1",
102        package, version
103    );
104    let response = match reqwest::blocking::get(&srcfiles_url) {
105        Ok(response) => response,
106        Err(e) => match e.status() {
107            Some(reqwest::StatusCode::NOT_FOUND) => {
108                return Err(Error::SnapshotMissing(package.to_owned(), version.clone()));
109            }
110            Some(s) => {
111                return Err(Error::SnapshotDownloadError(
112                    srcfiles_url,
113                    e,
114                    if s.is_server_error() {
115                        Some(true)
116                    } else {
117                        None
118                    },
119                ));
120            }
121            None => {
122                return Err(Error::SnapshotDownloadError(srcfiles_url, e, None));
123            }
124        },
125    };
126    let srcfiles = response.json::<SrcFiles>().unwrap();
127
128    let mut files = HashMap::new();
129
130    for (hsh, entries) in srcfiles.fileinfo.iter() {
131        for entry in entries {
132            files.insert(entry.name.clone(), hsh.clone());
133        }
134    }
135
136    for (filename, hsh) in files.iter() {
137        let local_path = output_dir.join(filename);
138        if local_path.exists() {
139            let mut f = File::open(&local_path).unwrap();
140            let mut actual_hsh = sha1::Sha1::new();
141            std::io::copy(&mut f, &mut actual_hsh).unwrap();
142            let actual_hsh = hex::encode(actual_hsh.finalize());
143            if actual_hsh != *hsh {
144                return Err(Error::SnapshotHashMismatch {
145                    filename: filename.clone(),
146                    actual_hash: actual_hsh,
147                    expected_hash: hsh.clone(),
148                });
149            }
150        } else {
151            let mut f = File::create(&local_path).unwrap();
152            let url = format!("https://snapshot.debian.org/file/{}", hsh);
153            log::info!("Downloading {} -> {}", url, filename);
154            let mut response = match reqwest::blocking::get(&url) {
155                Ok(response) => response,
156                Err(e) => match e.status() {
157                    Some(s) => {
158                        return Err(Error::SnapshotDownloadError(
159                            url,
160                            e,
161                            if s.is_server_error() {
162                                Some(true)
163                            } else {
164                                None
165                            },
166                        ));
167                    }
168                    None => {
169                        return Err(Error::SnapshotDownloadError(url, e, None));
170                    }
171                },
172            };
173            std::io::copy(&mut response, &mut f).unwrap();
174        }
175    }
176
177    let mut file_version = srcfiles.version;
178    file_version.epoch = None;
179    let dsc_filename = format!("{}_{}.dsc", srcfiles.package, file_version);
180    Ok(output_dir.join(&dsc_filename))
181}