Skip to main content

gdown_core/
url.rs

1//! URL parsing for Google Drive links
2//!
3//! Supports various Google Drive URL formats:
4//! - https://drive.google.com/uc?id=<id>
5//! - https://drive.google.com/file/d/<id>/view
6//! - https://drive.google.com/open?id=<id>
7//! - https://docs.google.com/document/d/<id>/edit
8//! - https://docs.google.com/spreadsheets/d/<id>/edit
9//! - https://docs.google.com/presentation/d/<id>/edit
10
11use crate::error::{GdownError, Result};
12use regex::Regex;
13
14/// Google Drive file ID (typically 26 alphanumeric characters)
15pub type FileId = String;
16
17/// Parse a Google Drive URL and extract the file ID.
18///
19/// # Arguments
20///
21/// * `url` - A Google Drive URL string
22///
23/// # Returns
24///
25/// * `Ok((Some(file_id), is_download_link))` - Successfully parsed
26/// * `Ok((None, false))` - Not a Google Drive URL
27///
28/// # Examples
29///
30/// ```
31/// use gdown_core::parse_url;
32/// let (id, is_dl) = parse_url("https://drive.google.com/file/d/1l_5RK28JRL19wpT22B-DY9We3TVXnnQQ/view").unwrap();
33/// assert!(id.is_some());
34/// ```
35pub fn parse_url(url: &str) -> Result<(Option<FileId>, bool)> {
36    // Handle bare file IDs (just the ID, no URL)
37    if !url.contains("://") && !url.starts_with("http") {
38        return Ok((Some(url.trim().to_string()), false));
39    }
40
41    let parsed = url::Url::parse(url).map_err(GdownError::UrlError)?;
42
43    let host = parsed.host_str().unwrap_or_default();
44    let is_drive = host == "drive.google.com" || host == "docs.google.com";
45
46    if !is_drive {
47        return Ok((None, false));
48    }
49
50    let path = parsed.path();
51    let is_download_link = path == "/uc" || path.ends_with("/uc");
52
53    // Try to extract file_id from query string first
54    let query: std::collections::HashMap<String, String> = parsed
55        .query_pairs()
56        .map(|(k, v)| (k.to_string(), v.to_string()))
57        .collect();
58
59    if let Some(id) = query.get("id") {
60        return Ok((Some(id.clone()), is_download_link));
61    }
62
63    // Pattern 1: /file/d/<id>/(edit|view)
64    let re1 = Regex::new(r"^/file/d/([^/]+)/(edit|view)$").unwrap();
65    if let Some(caps) = re1.captures(path) {
66        return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
67    }
68
69    // Pattern 2: /file/u/<digit>+/d/<id>/(edit|view)
70    let re2 = Regex::new(r"^/file/u/[0-9]+/d/([^/]+)/(edit|view)$").unwrap();
71    if let Some(caps) = re2.captures(path) {
72        return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
73    }
74
75    // Pattern 3: /document/d/<id>/(edit|htmlview|view)
76    let re3 = Regex::new(r"^/document/d/([^/]+)/(edit|htmlview|view)$").unwrap();
77    if let Some(caps) = re3.captures(path) {
78        return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
79    }
80
81    // Pattern 4: /document/u/<digit>+/d/<id>/(edit|htmlview|view)
82    let re4 = Regex::new(r"^/document/u/[0-9]+/d/([^/]+)/(edit|htmlview|view)$").unwrap();
83    if let Some(caps) = re4.captures(path) {
84        return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
85    }
86
87    // Pattern 5: /presentation/d/<id>/(edit|htmlview|view)
88    let re5 = Regex::new(r"^/presentation/d/([^/]+)/(edit|htmlview|view)$").unwrap();
89    if let Some(caps) = re5.captures(path) {
90        return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
91    }
92
93    // Pattern 6: /presentation/u/<digit>+/d/<id>/(edit|htmlview|view)
94    let re6 = Regex::new(r"^/presentation/u/[0-9]+/d/([^/]+)/(edit|htmlview|view)$").unwrap();
95    if let Some(caps) = re6.captures(path) {
96        return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
97    }
98
99    // Pattern 7: /spreadsheets/d/<id>/(edit|htmlview|view)
100    let re7 = Regex::new(r"^/spreadsheets/d/([^/]+)/(edit|htmlview|view)$").unwrap();
101    if let Some(caps) = re7.captures(path) {
102        return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
103    }
104
105    // Pattern 8: /spreadsheets/u/<digit>+/d/<id>/(edit|htmlview|view)
106    let re8 = Regex::new(r"^/spreadsheets/u/[0-9]+/d/([^/]+)/(edit|htmlview|view)$").unwrap();
107    if let Some(caps) = re8.captures(path) {
108        return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
109    }
110
111    // Pattern 9: /drive/folders/<id>
112    let re9 = Regex::new(r"^/drive/folders/([^/]+)").unwrap();
113    if let Some(caps) = re9.captures(path) {
114        return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
115    }
116
117    // Pattern 10: /drive/folders/<id>/view (with view suffix)
118    let re10 = Regex::new(r"^/drive/folders/([^/]+)/view$").unwrap();
119    if let Some(caps) = re10.captures(path) {
120        return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
121    }
122
123    Ok((None, is_download_link))
124}
125
126/// Check if URL is a Google Drive URL
127pub fn is_google_drive_url(url: &str) -> bool {
128    if let Ok((id, _)) = parse_url(url) {
129        id.is_some()
130    } else {
131        false
132    }
133}
134
135/// Build a download URL from a file ID
136pub fn build_download_url(file_id: &str) -> String {
137    format!("https://drive.google.com/uc?id={}&export=download", file_id)
138}
139
140/// Build an export URL for Google Docs/Sheets/Slides
141pub fn build_export_url(file_id: &str, format: &str) -> String {
142    format!(
143        "https://docs.google.com/document/d/{}/export?format={}",
144        file_id, format
145    )
146}
147
148#[cfg(test)]
149mod tests {
150    use super::*;
151
152    // Real file IDs from gdown-main tests
153    const REAL_FILE_ID: &str = "0B_NiLAzvehC9R2stRmQyM3ZiVjQ";
154    const REAL_FILE_ID2: &str = "0B9P1L--7Wd2vU3VUVlFnbTgtS2c";
155    const REAL_FOLDER_ID: &str = "15uNXeRBIhVvZJIhL4yTw4IsStMhUaaxl";
156    const REAL_GOOGLE_DOC_ID: &str = "1DvsG277pWa4WMssXjD9qYYAdF51y7hVidZ6eklfq480";
157
158    #[test]
159    fn test_parse_google_open() {
160        // https://drive.google.com/open?id=0B_NiLAzvehC9R2stRmQyM3ZiVjQ from test_parse_url.py
161        let url = format!("https://drive.google.com/open?id={}", REAL_FILE_ID);
162        let (id, is_dl) = parse_url(&url).unwrap();
163        assert_eq!(id, Some(REAL_FILE_ID.to_string()));
164        assert!(!is_dl);
165    }
166
167    #[test]
168    fn test_parse_uc_download_link() {
169        // https://drive.google.com/uc?id=0B_NiLAzvehC9R2stRmQyM3ZiVjQ from test_parse_url.py
170        let url = format!("https://drive.google.com/uc?id={}", REAL_FILE_ID);
171        let (id, is_dl) = parse_url(&url).unwrap();
172        assert_eq!(id, Some(REAL_FILE_ID.to_string()));
173        assert!(is_dl);
174    }
175
176    #[test]
177    fn test_parse_file_view_link() {
178        // https://drive.google.com/file/d/0B9P1L--7Wd2vU3VUVlFnbTgtS2c/view?usp=sharing from test_download.py
179        let url = format!("https://drive.google.com/file/d/{}/view?usp=sharing", REAL_FILE_ID2);
180        let (id, is_dl) = parse_url(&url).unwrap();
181        assert_eq!(id, Some(REAL_FILE_ID2.to_string()));
182        assert!(!is_dl);
183    }
184
185    #[test]
186    fn test_parse_subdomain_uc_link() {
187        // https://drive.google.com/a/jsk.imi.i.u-tokyo.ac.jp/uc?id=0B_NiLAzvehC9R2stRmQyM3ZiVjQ&export=download
188        let url = format!("https://drive.google.com/a/jsk.imi.i.u-tokyo.ac.jp/uc?id={}&export=download", REAL_FILE_ID);
189        let (id, is_dl) = parse_url(&url).unwrap();
190        assert_eq!(id, Some(REAL_FILE_ID.to_string()));
191        assert!(is_dl);
192    }
193
194    #[test]
195    fn test_parse_file_edit_link() {
196        let url = format!("https://drive.google.com/file/d/{}/edit", REAL_FILE_ID);
197        let (id, is_dl) = parse_url(&url).unwrap();
198        assert_eq!(id, Some(REAL_FILE_ID.to_string()));
199        assert!(!is_dl);
200    }
201
202    #[test]
203    fn test_parse_open_link() {
204        let url = format!("https://drive.google.com/open?id={}", REAL_FILE_ID);
205        let (id, is_dl) = parse_url(&url).unwrap();
206        assert_eq!(id, Some(REAL_FILE_ID.to_string()));
207        assert!(!is_dl);
208    }
209
210    #[test]
211    fn test_parse_google_doc_edit() {
212        // Real Google Docs URL
213        let url = format!("https://docs.google.com/document/d/{}/edit", REAL_GOOGLE_DOC_ID);
214        let (id, is_dl) = parse_url(&url).unwrap();
215        assert_eq!(id, Some(REAL_GOOGLE_DOC_ID.to_string()));
216        assert!(!is_dl);
217    }
218
219    #[test]
220    fn test_parse_google_doc_view() {
221        let url = format!("https://docs.google.com/document/d/{}/view", REAL_GOOGLE_DOC_ID);
222        let (id, is_dl) = parse_url(&url).unwrap();
223        assert_eq!(id, Some(REAL_GOOGLE_DOC_ID.to_string()));
224        assert!(!is_dl);
225    }
226
227    #[test]
228    fn test_parse_google_doc_htmlview() {
229        let url = format!("https://docs.google.com/document/d/{}/htmlview", REAL_GOOGLE_DOC_ID);
230        let (id, is_dl) = parse_url(&url).unwrap();
231        assert_eq!(id, Some(REAL_GOOGLE_DOC_ID.to_string()));
232        assert!(!is_dl);
233    }
234
235    #[test]
236    fn test_parse_google_sheet_edit() {
237        let url = format!("https://docs.google.com/spreadsheets/d/{}/edit", REAL_GOOGLE_DOC_ID);
238        let (id, is_dl) = parse_url(&url).unwrap();
239        assert_eq!(id, Some(REAL_GOOGLE_DOC_ID.to_string()));
240        assert!(!is_dl);
241    }
242
243    #[test]
244    fn test_parse_google_slides_edit() {
245        // https://docs.google.com/presentation/d/1DvsG277pWa4WMssXjD9qYYAdF51y7hVidZ6eklfq480/edit?usp=drive_link
246        let url = format!("https://docs.google.com/presentation/d/{}/edit", REAL_GOOGLE_DOC_ID);
247        let (id, is_dl) = parse_url(&url).unwrap();
248        assert_eq!(id, Some(REAL_GOOGLE_DOC_ID.to_string()));
249        assert!(!is_dl);
250    }
251
252    #[test]
253    fn test_parse_folder_link() {
254        // https://drive.google.com/drive/folders/15uNXeRBIhVvZJIhL4yTw4IsStMhUaaxl from README
255        let url = format!("https://drive.google.com/drive/folders/{}", REAL_FOLDER_ID);
256        let (id, is_dl) = parse_url(&url).unwrap();
257        assert_eq!(id, Some(REAL_FOLDER_ID.to_string()));
258        assert!(!is_dl);
259    }
260
261    #[test]
262    fn test_parse_file_u_d_link() {
263        let url = format!("https://drive.google.com/file/u/0/d/{}/view", REAL_FILE_ID);
264        let (id, is_dl) = parse_url(&url).unwrap();
265        assert_eq!(id, Some(REAL_FILE_ID.to_string()));
266        assert!(!is_dl);
267    }
268
269    #[test]
270    fn test_parse_document_u_d_link() {
271        let url = format!("https://docs.google.com/document/u/0/d/{}/edit", REAL_GOOGLE_DOC_ID);
272        let (id, is_dl) = parse_url(&url).unwrap();
273        assert_eq!(id, Some(REAL_GOOGLE_DOC_ID.to_string()));
274        assert!(!is_dl);
275    }
276
277    #[test]
278    fn test_parse_presentation_u_d_link() {
279        let url = format!("https://docs.google.com/presentation/u/0/d/{}/edit", REAL_GOOGLE_DOC_ID);
280        let (id, is_dl) = parse_url(&url).unwrap();
281        assert_eq!(id, Some(REAL_GOOGLE_DOC_ID.to_string()));
282        assert!(!is_dl);
283    }
284
285    #[test]
286    fn test_parse_non_gdrive_url() {
287        // https://github.com/wkentaro/gdown/archive/refs/tags/v4.0.0.tar.gz from conftest.py
288        let url = "https://github.com/wkentaro/gdown/archive/refs/tags/v4.0.0.tar.gz";
289        let (id, _) = parse_url(url).unwrap();
290        assert!(id.is_none());
291    }
292
293    #[test]
294    fn test_parse_bare_id() {
295        let id = REAL_FILE_ID;
296        let (result_id, is_dl) = parse_url(id).unwrap();
297        assert_eq!(result_id, Some(id.to_string()));
298        assert!(!is_dl);
299    }
300
301    #[test]
302    fn test_build_download_url() {
303        let url = build_download_url(REAL_FILE_ID);
304        assert_eq!(url, format!("https://drive.google.com/uc?id={}&export=download", REAL_FILE_ID));
305    }
306
307    #[test]
308    fn test_build_export_url() {
309        let url = build_export_url(REAL_FILE_ID, "pdf");
310        assert_eq!(url, format!("https://docs.google.com/document/d/{}/export?format=pdf", REAL_FILE_ID));
311    }
312}