Skip to main content

debian_analyzer/
snapshot.rs

1//! Interacting with snapshot.debian.org
2use debversion::Version;
3use sha1::Digest;
4use std::collections::HashMap;
5use std::fs::File;
6use std::io::Read;
7use std::path::{Path, PathBuf};
8
9#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
10/// A struct representing a file in a snapshot
11struct FileInfo {
12    archive_name: String,
13
14    /// The date the file was first seen
15    first_seen: chrono::DateTime<chrono::Utc>,
16
17    /// The name of the file
18    name: String,
19    /// Path to the file
20    path: String,
21
22    /// The size of the file
23    size: usize,
24}
25
26#[derive(Debug)]
27/// An error that can occur while downloading a snapshot
28pub enum Error {
29    /// An error occurred while downloading a snapshot
30    SnapshotDownloadError(String, reqwest::Error, Option<bool>),
31
32    /// The snapshot is missing
33    SnapshotMissing(String, Version),
34
35    /// The hash of a file in the snapshot does not match the expected hash
36    SnapshotHashMismatch {
37        /// The filename of the file
38        filename: String,
39
40        /// The actual hash of the file
41        actual_hash: String,
42
43        /// The expected hash of the file
44        expected_hash: String,
45    },
46}
47
48impl std::fmt::Display for Error {
49    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
50        match self {
51            Error::SnapshotDownloadError(url, e, Some(true)) => {
52                write!(f, "Snapshot download error: {} (server error) {}", url, e)
53            }
54            Error::SnapshotDownloadError(url, e, _) => {
55                write!(f, "Snapshot download error: {} {}", url, e)
56            }
57            Error::SnapshotMissing(package, version) => {
58                write!(f, "Snapshot missing: {} {}", package, version)
59            }
60            Error::SnapshotHashMismatch {
61                filename,
62                actual_hash,
63                expected_hash,
64            } => {
65                write!(
66                    f,
67                    "Hash mismatch for {}: expected {} but got {}",
68                    filename, expected_hash, actual_hash
69                )
70            }
71        }
72    }
73}
74
75#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
76struct FileHash {
77    hash: String,
78}
79
80#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
81struct SrcFiles {
82    fileinfo: HashMap<String, Vec<FileInfo>>,
83
84    package: String,
85
86    version: Version,
87
88    result: Vec<FileHash>,
89
90    #[serde(rename = "_comment")]
91    comment: String,
92}
93
94/// Download a snapshot of a package
95pub fn download_snapshot(
96    package: &str,
97    version: &Version,
98    output_dir: &Path,
99) -> Result<PathBuf, Error> {
100    log::info!("Downloading {} {}", package, version);
101    let srcfiles_url = format!(
102        "https://snapshot.debian.org/mr/package/{}/{}/srcfiles?fileinfo=1",
103        package, version
104    );
105    let response = match reqwest::blocking::get(&srcfiles_url) {
106        Ok(response) => response,
107        Err(e) => match e.status() {
108            Some(reqwest::StatusCode::NOT_FOUND) => {
109                return Err(Error::SnapshotMissing(package.to_owned(), version.clone()));
110            }
111            Some(s) => {
112                return Err(Error::SnapshotDownloadError(
113                    srcfiles_url,
114                    e,
115                    if s.is_server_error() {
116                        Some(true)
117                    } else {
118                        None
119                    },
120                ));
121            }
122            None => {
123                return Err(Error::SnapshotDownloadError(srcfiles_url, e, None));
124            }
125        },
126    };
127    let srcfiles = response.json::<SrcFiles>().unwrap();
128
129    let mut files = HashMap::new();
130
131    for (hsh, entries) in srcfiles.fileinfo.iter() {
132        for entry in entries {
133            files.insert(entry.name.clone(), hsh.clone());
134        }
135    }
136
137    for (filename, hsh) in files.iter() {
138        let local_path = output_dir.join(filename);
139        if local_path.exists() {
140            let mut f = File::open(&local_path).unwrap();
141            let mut actual_hsh = sha1::Sha1::new();
142            let mut buf = [0u8; 8192];
143            loop {
144                let n = f.read(&mut buf).unwrap();
145                if n == 0 {
146                    break;
147                }
148                actual_hsh.update(&buf[..n]);
149            }
150            let actual_hsh = hex::encode(actual_hsh.finalize());
151            if actual_hsh != *hsh {
152                return Err(Error::SnapshotHashMismatch {
153                    filename: filename.clone(),
154                    actual_hash: actual_hsh,
155                    expected_hash: hsh.clone(),
156                });
157            }
158        } else {
159            let mut f = File::create(&local_path).unwrap();
160            let url = format!("https://snapshot.debian.org/file/{}", hsh);
161            log::info!("Downloading {} -> {}", url, filename);
162            let mut response = match reqwest::blocking::get(&url) {
163                Ok(response) => response,
164                Err(e) => match e.status() {
165                    Some(s) => {
166                        return Err(Error::SnapshotDownloadError(
167                            url,
168                            e,
169                            if s.is_server_error() {
170                                Some(true)
171                            } else {
172                                None
173                            },
174                        ));
175                    }
176                    None => {
177                        return Err(Error::SnapshotDownloadError(url, e, None));
178                    }
179                },
180            };
181            std::io::copy(&mut response, &mut f).unwrap();
182        }
183    }
184
185    let mut file_version = srcfiles.version;
186    file_version.epoch = None;
187    let dsc_filename = format!("{}_{}.dsc", srcfiles.package, file_version);
188    Ok(output_dir.join(&dsc_filename))
189}