debian_analyzer/
snapshot.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
//! Interacting with snapshot.debian.org
use debversion::Version;
use sha1::Digest;
use std::collections::HashMap;
use std::fs::File;
use std::path::{Path, PathBuf};

#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
/// A struct representing a file in a snapshot
struct FileInfo {
    archive_name: String,

    /// The date the file was first seen
    first_seen: chrono::DateTime<chrono::Utc>,

    /// The name of the file
    name: String,
    /// Path to the file
    path: String,

    /// The size of the file
    size: usize,
}

#[derive(Debug)]
/// An error that can occur while downloading a snapshot
pub enum Error {
    /// An error occurred while downloading a snapshot
    SnapshotDownloadError(String, reqwest::Error, Option<bool>),

    /// The snapshot is missing
    SnapshotMissing(String, Version),

    /// The hash of a file in the snapshot does not match the expected hash
    SnapshotHashMismatch {
        /// The filename of the file
        filename: String,

        /// The actual hash of the file
        actual_hash: String,

        /// The expected hash of the file
        expected_hash: String,
    },
}

impl std::fmt::Display for Error {
    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
        match self {
            Error::SnapshotDownloadError(url, e, Some(true)) => {
                write!(f, "Snapshot download error: {} (server error) {}", url, e)
            }
            Error::SnapshotDownloadError(url, e, _) => {
                write!(f, "Snapshot download error: {} {}", url, e)
            }
            Error::SnapshotMissing(package, version) => {
                write!(f, "Snapshot missing: {} {}", package, version)
            }
            Error::SnapshotHashMismatch {
                filename,
                actual_hash,
                expected_hash,
            } => {
                write!(
                    f,
                    "Hash mismatch for {}: expected {} but got {}",
                    filename, expected_hash, actual_hash
                )
            }
        }
    }
}

#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
struct FileHash {
    hash: String,
}

#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
struct SrcFiles {
    fileinfo: HashMap<String, Vec<FileInfo>>,

    package: String,

    version: Version,

    result: Vec<FileHash>,

    #[serde(rename = "_comment")]
    comment: String,
}

/// Download a snapshot of a package
pub fn download_snapshot(
    package: &str,
    version: &Version,
    output_dir: &Path,
) -> Result<PathBuf, Error> {
    log::info!("Downloading {} {}", package, version);
    let srcfiles_url = format!(
        "https://snapshot.debian.org/mr/package/{}/{}/srcfiles?fileinfo=1",
        package, version
    );
    let response = match reqwest::blocking::get(&srcfiles_url) {
        Ok(response) => response,
        Err(e) => match e.status() {
            Some(reqwest::StatusCode::NOT_FOUND) => {
                return Err(Error::SnapshotMissing(package.to_owned(), version.clone()));
            }
            Some(s) => {
                return Err(Error::SnapshotDownloadError(
                    srcfiles_url,
                    e,
                    if s.is_server_error() {
                        Some(true)
                    } else {
                        None
                    },
                ));
            }
            None => {
                return Err(Error::SnapshotDownloadError(srcfiles_url, e, None));
            }
        },
    };
    let srcfiles = response.json::<SrcFiles>().unwrap();

    let mut files = HashMap::new();

    for (hsh, entries) in srcfiles.fileinfo.iter() {
        for entry in entries {
            files.insert(entry.name.clone(), hsh.clone());
        }
    }

    for (filename, hsh) in files.iter() {
        let local_path = output_dir.join(filename);
        if local_path.exists() {
            let mut f = File::open(&local_path).unwrap();
            let mut actual_hsh = sha1::Sha1::new();
            std::io::copy(&mut f, &mut actual_hsh).unwrap();
            let actual_hsh = hex::encode(actual_hsh.finalize());
            if actual_hsh != *hsh {
                return Err(Error::SnapshotHashMismatch {
                    filename: filename.clone(),
                    actual_hash: actual_hsh,
                    expected_hash: hsh.clone(),
                });
            }
        } else {
            let mut f = File::create(&local_path).unwrap();
            let url = format!("https://snapshot.debian.org/file/{}", hsh);
            log::info!("Downloading {} -> {}", url, filename);
            let mut response = match reqwest::blocking::get(&url) {
                Ok(response) => response,
                Err(e) => match e.status() {
                    Some(s) => {
                        return Err(Error::SnapshotDownloadError(
                            url,
                            e,
                            if s.is_server_error() {
                                Some(true)
                            } else {
                                None
                            },
                        ));
                    }
                    None => {
                        return Err(Error::SnapshotDownloadError(url, e, None));
                    }
                },
            };
            std::io::copy(&mut response, &mut f).unwrap();
        }
    }

    let mut file_version = srcfiles.version;
    file_version.epoch = None;
    let dsc_filename = format!("{}_{}.dsc", srcfiles.package, file_version);
    Ok(output_dir.join(&dsc_filename))
}