Skip to main content

gdown_core/
folder.rs

1//! Folder download logic for Google Drive
2
3use crate::download::DownloadOptions;
4use crate::download::Downloader;
5use crate::error::{GdownError, Result};
6use crate::url::parse_url;
7use std::pin::Pin;
8use std::path::PathBuf;
9
10/// Represents a file or folder in Google Drive
11#[derive(Debug, Clone)]
12pub struct GoogleDriveFile {
13    pub id: String,
14    pub name: String,
15    pub is_folder: bool,
16    pub size: Option<u64>,
17}
18
19/// Options for folder download
20#[derive(Debug, Clone)]
21pub struct FolderDownloadOptions {
22    pub speed_limit: Option<u64>,
23    pub resume: bool,
24}
25
26/// Download an entire Google Drive folder
27pub fn download_folder<'a>(
28    downloader: &'a Downloader,
29    folder_url: &'a str,
30    output_dir: PathBuf,
31    options: FolderDownloadOptions,
32) -> Pin<Box<dyn futures::Future<Output = Result<Vec<GoogleDriveFile>>> + 'a>> {
33    Box::pin(async move {
34        let (folder_id, _) = parse_url(folder_url)?;
35        let folder_id = folder_id.ok_or_else(|| GdownError::InvalidUrl("No folder ID found".into()))?;
36
37        let view_url = format!(
38            "https://drive.google.com/embeddedfolderview?id={}#list",
39            folder_id
40        );
41
42        let client = downloader.build_client();
43        let response = client.get(&view_url).send().await.map_err(|e| GdownError::Download(e.to_string()))?;
44        let html = response.text().await.map_err(|e| GdownError::Download(e.to_string()))?;
45
46        let files = parse_folder_view(&html)?;
47
48        tokio::fs::create_dir_all(&output_dir).await?;
49
50        let mut downloaded_files = Vec::new();
51
52        for file in files {
53            if file.is_folder {
54                let subfolder_path = output_dir.join(&file.name);
55                let subfolder_url = format!("https://drive.google.com/drive/folders/{}", file.id);
56                download_folder(downloader, &subfolder_url, subfolder_path, options.clone()).await?;
57            } else {
58                let file_url = format!("https://drive.google.com/file/d/{}/view", file.id);
59                let output_path = output_dir.join(&file.name);
60
61                let opts = DownloadOptions {
62                    speed_limit: options.speed_limit,
63                    resume: options.resume,
64                    format: None,
65                    progress_callback: None,
66                };
67
68                downloader.download(&file_url, &output_path, opts).await?;
69            }
70            downloaded_files.push(file);
71        }
72
73        Ok(downloaded_files)
74    })
75}
76
77/// Parse Google Drive embedded folder view HTML to extract file list
78pub fn parse_folder_view(html: &str) -> Result<Vec<GoogleDriveFile>> {
79    use regex::Regex;
80
81    let mut files = Vec::new();
82
83    let file_regex = Regex::new(r#"/file/d/([A-Za-z0-9_-]+)"#).unwrap();
84    for caps in file_regex.captures_iter(html) {
85        let id = caps.get(1).unwrap().as_str().to_string();
86        files.push(GoogleDriveFile {
87            id,
88            name: String::new(),
89            is_folder: false,
90            size: None,
91        });
92    }
93
94    let folder_regex = Regex::new(r#"/drive/folders/([A-Za-z0-9_-]+)"#).unwrap();
95    for caps in folder_regex.captures_iter(html) {
96        let id = caps.get(1).unwrap().as_str().to_string();
97        files.push(GoogleDriveFile {
98            id,
99            name: String::new(),
100            is_folder: true,
101            size: None,
102        });
103    }
104
105    if files.is_empty() {
106        let js_file_regex = Regex::new(r#"setShareableLink.*?/file/d/([A-Za-z0-9_-]+)"#).unwrap();
107        for caps in js_file_regex.captures_iter(html) {
108            let id = caps.get(1).unwrap().as_str().to_string();
109            files.push(GoogleDriveFile {
110                id: id.clone(),
111                name: format!("file_{}", &id[..8]),
112                is_folder: false,
113                size: None,
114            });
115        }
116    }
117
118    Ok(files)
119}
120
121#[cfg(test)]
122mod tests {
123    use super::*;
124
125    #[test]
126    fn test_parse_folder_view_file_links() {
127        // Sample HTML from embedded folder view with file links
128        let html = r#"
129            <html>
130            <body>
131            <a href="/file/d/0B_NiLAzvehC9R2stRmQyM3ZiVjQ/view">Test File 1.txt</a>
132            <a href="/file/d/0B9P1L--7Wd2vU3VUVlFnbTgtS2c/view">Test File 2.pdf</a>
133            </body>
134            </html>
135        "#;
136        let files = parse_folder_view(html).unwrap();
137        assert_eq!(files.len(), 2);
138        assert_eq!(files[0].id, "0B_NiLAzvehC9R2stRmQyM3ZiVjQ");
139        assert!(!files[0].is_folder);
140        assert_eq!(files[1].id, "0B9P1L--7Wd2vU3VUVlFnbTgtS2c");
141        assert!(!files[1].is_folder);
142    }
143
144    #[test]
145    fn test_parse_folder_view_folder_links() {
146        let html = r#"
147            <html>
148            <body>
149            <a href="/drive/folders/15uNXeRBIhVvZJIhL4yTw4IsStMhUaaxl">My Subfolder</a>
150            </body>
151            </html>
152        "#;
153        let files = parse_folder_view(html).unwrap();
154        assert_eq!(files.len(), 1);
155        assert_eq!(files[0].id, "15uNXeRBIhVvZJIhL4yTw4IsStMhUaaxl");
156        assert!(files[0].is_folder);
157    }
158
159    #[test]
160    fn test_parse_folder_view_mixed_content() {
161        // Mixed files and folders
162        // Note: regex collects files first, then folders, so order is:
163        // FILE1, FILE2, FOLDER1, FOLDER2 (not interleaved)
164        let html = r#"
165            <html>
166            <body>
167            <a href="/file/d/FILE1/view">document1.txt</a>
168            <a href="/drive/folders/FOLDER1">subfolder1</a>
169            <a href="/file/d/FILE2/view">document2.pdf</a>
170            <a href="/drive/folders/FOLDER2">subfolder2</a>
171            </body>
172            </html>
173        "#;
174        let files = parse_folder_view(html).unwrap();
175        assert_eq!(files.len(), 4);
176        // First file
177        assert_eq!(files[0].id, "FILE1");
178        assert!(!files[0].is_folder);
179        // Second file
180        assert_eq!(files[1].id, "FILE2");
181        assert!(!files[1].is_folder);
182        // First folder
183        assert_eq!(files[2].id, "FOLDER1");
184        assert!(files[2].is_folder);
185        // Second folder
186        assert_eq!(files[3].id, "FOLDER2");
187        assert!(files[3].is_folder);
188    }
189
190    #[test]
191    fn test_parse_folder_view_empty() {
192        let html = r#"<html><body><p>No files here</p></body></html>"#;
193        let files = parse_folder_view(html).unwrap();
194        assert!(files.is_empty());
195    }
196
197    #[test]
198    fn test_parse_folder_view_js_shareable_link() {
199        // JavaScript setShareableLink pattern - only used when no file/folder links found
200        // Since /file/d/ID appears in JS, it will be picked up by file_regex first
201        // and the js_file_regex fallback won't run
202        let html = r#"
203            <html>
204            <body>
205            <script>
206            setShareableLink('/file/d/1DvsG277pWa4WMssXjD9qYYAdF51y7hVidZ6eklfq480');
207            </script>
208            </body>
209            </html>
210        "#;
211        let files = parse_folder_view(html).unwrap();
212        // file_regex finds the ID in the JS (no /view suffix needed)
213        assert_eq!(files.len(), 1);
214        assert_eq!(files[0].id, "1DvsG277pWa4WMssXjD9qYYAdF51y7hVidZ6eklfq480");
215        assert!(!files[0].is_folder);
216        // Name is empty since file_regex doesn't generate names
217        assert_eq!(files[0].name, "");
218    }
219}