Skip to main content

modde_sources/gdrive/
mod.rs

1//! Google Drive download source, including handling of the large-file virus
2//! scan confirmation page.
3
4use std::collections::HashMap;
5use std::path::Path;
6
7use reqwest::Client;
8use tracing::debug;
9
10use modde_core::manifest::wabbajack::DownloadDirective;
11
12use crate::common::{ensure_parent, stream_to_file_verified};
13use crate::error::{SourceError, SourceResult, status_error};
14use crate::traits::{DownloadHandle, DownloadSource, ProgressCallback, VerifiedFile};
15
16/// Google Drive download source.
17///
18/// Handles the virus scan warning page for large files.
19pub struct GoogleDriveSource {
20    client: Client,
21}
22
23impl GoogleDriveSource {
24    /// Create a source that downloads over the given HTTP `client`.
25    #[must_use]
26    pub fn new(client: Client) -> Self {
27        Self { client }
28    }
29}
30
31impl DownloadSource for GoogleDriveSource {
32    fn can_handle(&self, directive: &DownloadDirective) -> bool {
33        matches!(directive, DownloadDirective::GoogleDrive { .. })
34    }
35
36    async fn resolve(&self, directive: &DownloadDirective) -> SourceResult<DownloadHandle> {
37        let DownloadDirective::GoogleDrive { id, hash } = directive else {
38            return Err(SourceError::other(anyhow::anyhow!(
39                "not a Google Drive directive"
40            )));
41        };
42
43        // Modern Google Drive flow: hit the usercontent host directly with confirm=t,
44        // which skips the interstitial virus-scan warning entirely. Falling back to
45        // the legacy `drive.google.com/uc?...` URL is left to the HTML extraction
46        // path inside `do_download` for old links that still serve the warning page.
47        let url = format!(
48            "https://drive.usercontent.google.com/download?id={id}&export=download&authuser=0&confirm=t"
49        );
50
51        Ok(DownloadHandle {
52            url,
53            candidate_urls: Vec::new(),
54            headers: HashMap::new(),
55            expected_hash: *hash,
56            size_hint: None,
57        })
58    }
59
60    async fn download_with_progress(
61        &self,
62        handle: DownloadHandle,
63        dest: &Path,
64        progress: ProgressCallback,
65    ) -> SourceResult<VerifiedFile> {
66        ensure_parent(dest).await?;
67
68        do_download(&self.client, &handle, dest, &progress).await
69    }
70}
71
72async fn do_download(
73    client: &Client,
74    handle: &DownloadHandle,
75    dest: &Path,
76    progress: &ProgressCallback,
77) -> SourceResult<VerifiedFile> {
78    let resp = status_error(client.get(&handle.url).send().await?)?;
79    let content_type = resp
80        .headers()
81        .get("content-type")
82        .and_then(|v| v.to_str().ok())
83        .unwrap_or("")
84        .to_string();
85
86    // If Google returns HTML, it's the virus scan warning page
87    if content_type.contains("text/html") {
88        debug!("got virus scan warning page, extracting confirm token");
89        let body = resp.text().await?;
90        let confirm_token = extract_confirm_token(&body).ok_or_else(|| {
91            SourceError::other(anyhow::anyhow!(
92                "failed to extract confirm token from virus scan page"
93            ))
94        })?;
95
96        let confirmed_url = format!("{}&confirm={confirm_token}", handle.url);
97        let resp = status_error(client.get(&confirmed_url).send().await?)?;
98        return stream_to_file_verified(
99            resp,
100            dest,
101            handle.expected_hash,
102            handle.size_hint.unwrap_or(0),
103            progress,
104        )
105        .await;
106    }
107    stream_to_file_verified(
108        resp,
109        dest,
110        handle.expected_hash,
111        handle.size_hint.unwrap_or(0),
112        progress,
113    )
114    .await
115}
116
117/// Extract the confirm token from Google Drive's virus scan warning HTML.
118fn extract_confirm_token(html: &str) -> Option<String> {
119    // Google uses a form with action containing confirm=TOKEN or an input named confirm
120    // Pattern 1: &confirm=TOKEN&
121    if let Some(pos) = html.find("confirm=") {
122        let rest = &html[pos + 8..];
123        let end = rest.find(|c: char| c == '&' || c == '"' || c == '\'' || c.is_whitespace())?;
124        let token = &rest[..end];
125        if !token.is_empty() {
126            return Some(token.to_string());
127        }
128    }
129    // Pattern 2: name="confirm" value="TOKEN"
130    if let Some(pos) = html.find("name=\"confirm\"") {
131        let rest = &html[pos..];
132        if let Some(val_pos) = rest.find("value=\"") {
133            let val_rest = &rest[val_pos + 7..];
134            let end = val_rest.find('"')?;
135            let token = &val_rest[..end];
136            if !token.is_empty() {
137                return Some(token.to_string());
138            }
139        }
140    }
141    // Pattern 3: id="uc-download-link" with href containing confirm=
142    if let Some(pos) = html.find("id=\"uc-download-link\"") {
143        let rest = &html[pos..];
144        if let Some(href_pos) = rest.find("confirm=") {
145            let val_rest = &rest[href_pos + 8..];
146            let end = val_rest.find(['&', '"', '\''])?;
147            let token = &val_rest[..end];
148            if !token.is_empty() {
149                return Some(token.to_string());
150            }
151        }
152    }
153    None
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159    use modde_core::GameId;
160
161    // ── extract_confirm_token: Pattern 1 – &confirm=TOKEN& ───────────────
162
163    #[test]
164    fn confirm_token_pattern1_ampersand_delimited() {
165        let html =
166            r#"<a href="https://drive.google.com/uc?id=ID&confirm=t&export=download">Download</a>"#;
167        assert_eq!(extract_confirm_token(html), Some("t".to_string()));
168    }
169
170    #[test]
171    fn confirm_token_pattern1_long_token() {
172        let html = r"something confirm=AbCdEfGh1234&rest";
173        assert_eq!(
174            extract_confirm_token(html),
175            Some("AbCdEfGh1234".to_string())
176        );
177    }
178
179    #[test]
180    fn confirm_token_pattern1_quote_delimited() {
181        let html = r#"href="https://example.com?confirm=mytoken""#;
182        assert_eq!(extract_confirm_token(html), Some("mytoken".to_string()));
183    }
184
185    #[test]
186    fn confirm_token_pattern1_single_quote_delimited() {
187        let html = r"href='https://example.com?confirm=tok123'";
188        assert_eq!(extract_confirm_token(html), Some("tok123".to_string()));
189    }
190
191    #[test]
192    fn confirm_token_pattern1_whitespace_delimited() {
193        let html = "url?confirm=TOKEN rest of text";
194        assert_eq!(extract_confirm_token(html), Some("TOKEN".to_string()));
195    }
196
197    // ── extract_confirm_token: Pattern 2 – name="confirm" value="TOKEN" ──
198
199    #[test]
200    fn confirm_token_pattern2_input_field() {
201        let html = r#"<input type="hidden" name="confirm" value="SecretVal"><input type="submit">"#;
202        assert_eq!(extract_confirm_token(html), Some("SecretVal".to_string()));
203    }
204
205    #[test]
206    fn confirm_token_pattern2_with_extra_attrs() {
207        let html = r#"<input class="foo" name="confirm" id="bar" value="TOKEN42">"#;
208        assert_eq!(extract_confirm_token(html), Some("TOKEN42".to_string()));
209    }
210
211    // ── extract_confirm_token: Pattern 3 – id="uc-download-link" ─────────
212
213    #[test]
214    fn confirm_token_pattern3_uc_download_link() {
215        let html = r#"<a id="uc-download-link" href="/uc?export=download&confirm=XyZ123&id=abc">Download anyway</a>"#;
216        assert_eq!(extract_confirm_token(html), Some("XyZ123".to_string()));
217    }
218
219    #[test]
220    fn confirm_token_pattern3_uc_download_link_quote_end() {
221        let html = r#"<a id="uc-download-link" href="/uc?export=download&confirm=TOK">"#;
222        assert_eq!(extract_confirm_token(html), Some("TOK".to_string()));
223    }
224
225    // ── extract_confirm_token: non-matching / edge cases ─────────────────
226
227    #[test]
228    fn confirm_token_no_match_random_html() {
229        let html = "<html><body><p>Hello world</p></body></html>";
230        assert_eq!(extract_confirm_token(html), None);
231    }
232
233    #[test]
234    fn confirm_token_no_match_empty_string() {
235        assert_eq!(extract_confirm_token(""), None);
236    }
237
238    #[test]
239    fn confirm_token_no_match_similar_but_not_confirm() {
240        let html = r#"<input name="confirmed" value="nope">"#;
241        // "confirmed" does not match "confirm=" pattern for pattern 1
242        // but it does contain "confirm=" when the 'e' follows... let's check:
243        // "confirmed" contains substring "confirm" so pattern 1 ("confirm=") won't match
244        // because after "confirm" comes "ed" not "=".
245        // But wait, the html also doesn't have "confirm=" anywhere.
246        assert_eq!(extract_confirm_token(html), None);
247    }
248
249    #[test]
250    fn confirm_token_empty_token_returns_none() {
251        // Pattern 1: confirm= followed immediately by &
252        let html = "confirm=&rest";
253        // Token would be "", which is checked: `if !token.is_empty()`
254        assert_eq!(extract_confirm_token(html), None);
255    }
256
257    // ── can_handle for GoogleDriveSource ─────────────────────────────────
258
259    #[test]
260    fn can_handle_google_drive_directive() {
261        let source = GoogleDriveSource::new(Client::new());
262        let directive = DownloadDirective::GoogleDrive {
263            id: "1AbCdEfGh".to_string(),
264            hash: 42,
265        };
266        assert!(source.can_handle(&directive));
267    }
268
269    #[test]
270    fn can_handle_rejects_mega() {
271        let source = GoogleDriveSource::new(Client::new());
272        let directive = DownloadDirective::Mega {
273            url: "https://mega.nz/file/X#Y".to_string(),
274            hash: 0,
275        };
276        assert!(!source.can_handle(&directive));
277    }
278
279    #[test]
280    fn can_handle_rejects_nexus() {
281        let source = GoogleDriveSource::new(Client::new());
282        let directive = DownloadDirective::Nexus {
283            game_id: GameId::from("skyrim"),
284            mod_id: 1.into(),
285            file_id: 1.into(),
286            hash: 0,
287        };
288        assert!(!source.can_handle(&directive));
289    }
290
291    #[test]
292    fn can_handle_rejects_github() {
293        let source = GoogleDriveSource::new(Client::new());
294        let directive = DownloadDirective::GitHub {
295            user: "u".to_string(),
296            repo: "r".to_string(),
297            tag: "t".to_string(),
298            asset: "a".to_string(),
299            hash: 0,
300        };
301        assert!(!source.can_handle(&directive));
302    }
303
304    #[test]
305    fn can_handle_rejects_direct_url() {
306        let source = GoogleDriveSource::new(Client::new());
307        let directive = DownloadDirective::DirectURL {
308            url: "https://example.com/file".to_string(),
309            headers: std::collections::HashMap::new(),
310            mirror_resolver: None,
311            hash: 0,
312        };
313        assert!(!source.can_handle(&directive));
314    }
315}