gdown-core 0.1.0

Core download logic for Google Drive
Documentation
//! Folder download logic for Google Drive

use crate::download::DownloadOptions;
use crate::download::Downloader;
use crate::error::{GdownError, Result};
use crate::url::parse_url;
use std::pin::Pin;
use std::path::PathBuf;

/// Represents a file or folder in Google Drive
#[derive(Debug, Clone)]
pub struct GoogleDriveFile {
    pub id: String,
    pub name: String,
    pub is_folder: bool,
    pub size: Option<u64>,
}

/// Options for folder download
#[derive(Debug, Clone)]
pub struct FolderDownloadOptions {
    pub speed_limit: Option<u64>,
    pub resume: bool,
}

/// Download an entire Google Drive folder
pub fn download_folder<'a>(
    downloader: &'a Downloader,
    folder_url: &'a str,
    output_dir: PathBuf,
    options: FolderDownloadOptions,
) -> Pin<Box<dyn futures::Future<Output = Result<Vec<GoogleDriveFile>>> + 'a>> {
    Box::pin(async move {
        let (folder_id, _) = parse_url(folder_url)?;
        let folder_id = folder_id.ok_or_else(|| GdownError::InvalidUrl("No folder ID found".into()))?;

        let view_url = format!(
            "https://drive.google.com/embeddedfolderview?id={}#list",
            folder_id
        );

        let client = downloader.build_client();
        let response = client.get(&view_url).send().await.map_err(|e| GdownError::Download(e.to_string()))?;
        let html = response.text().await.map_err(|e| GdownError::Download(e.to_string()))?;

        let files = parse_folder_view(&html)?;

        tokio::fs::create_dir_all(&output_dir).await?;

        let mut downloaded_files = Vec::new();

        for file in files {
            if file.is_folder {
                let subfolder_path = output_dir.join(&file.name);
                let subfolder_url = format!("https://drive.google.com/drive/folders/{}", file.id);
                download_folder(downloader, &subfolder_url, subfolder_path, options.clone()).await?;
            } else {
                let file_url = format!("https://drive.google.com/file/d/{}/view", file.id);
                let output_path = output_dir.join(&file.name);

                let opts = DownloadOptions {
                    speed_limit: options.speed_limit,
                    resume: options.resume,
                    format: None,
                    progress_callback: None,
                };

                downloader.download(&file_url, &output_path, opts).await?;
            }
            downloaded_files.push(file);
        }

        Ok(downloaded_files)
    })
}

/// Parse Google Drive embedded folder view HTML to extract file list
pub fn parse_folder_view(html: &str) -> Result<Vec<GoogleDriveFile>> {
    use regex::Regex;

    let mut files = Vec::new();

    let file_regex = Regex::new(r#"/file/d/([A-Za-z0-9_-]+)"#).unwrap();
    for caps in file_regex.captures_iter(html) {
        let id = caps.get(1).unwrap().as_str().to_string();
        files.push(GoogleDriveFile {
            id,
            name: String::new(),
            is_folder: false,
            size: None,
        });
    }

    let folder_regex = Regex::new(r#"/drive/folders/([A-Za-z0-9_-]+)"#).unwrap();
    for caps in folder_regex.captures_iter(html) {
        let id = caps.get(1).unwrap().as_str().to_string();
        files.push(GoogleDriveFile {
            id,
            name: String::new(),
            is_folder: true,
            size: None,
        });
    }

    if files.is_empty() {
        let js_file_regex = Regex::new(r#"setShareableLink.*?/file/d/([A-Za-z0-9_-]+)"#).unwrap();
        for caps in js_file_regex.captures_iter(html) {
            let id = caps.get(1).unwrap().as_str().to_string();
            files.push(GoogleDriveFile {
                id: id.clone(),
                name: format!("file_{}", &id[..8]),
                is_folder: false,
                size: None,
            });
        }
    }

    Ok(files)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_parse_folder_view_file_links() {
        // Sample HTML from embedded folder view with file links
        let html = r#"
            <html>
            <body>
            <a href="/file/d/0B_NiLAzvehC9R2stRmQyM3ZiVjQ/view">Test File 1.txt</a>
            <a href="/file/d/0B9P1L--7Wd2vU3VUVlFnbTgtS2c/view">Test File 2.pdf</a>
            </body>
            </html>
        "#;
        let files = parse_folder_view(html).unwrap();
        assert_eq!(files.len(), 2);
        assert_eq!(files[0].id, "0B_NiLAzvehC9R2stRmQyM3ZiVjQ");
        assert!(!files[0].is_folder);
        assert_eq!(files[1].id, "0B9P1L--7Wd2vU3VUVlFnbTgtS2c");
        assert!(!files[1].is_folder);
    }

    #[test]
    fn test_parse_folder_view_folder_links() {
        let html = r#"
            <html>
            <body>
            <a href="/drive/folders/15uNXeRBIhVvZJIhL4yTw4IsStMhUaaxl">My Subfolder</a>
            </body>
            </html>
        "#;
        let files = parse_folder_view(html).unwrap();
        assert_eq!(files.len(), 1);
        assert_eq!(files[0].id, "15uNXeRBIhVvZJIhL4yTw4IsStMhUaaxl");
        assert!(files[0].is_folder);
    }

    #[test]
    fn test_parse_folder_view_mixed_content() {
        // Mixed files and folders
        // Note: regex collects files first, then folders, so order is:
        // FILE1, FILE2, FOLDER1, FOLDER2 (not interleaved)
        let html = r#"
            <html>
            <body>
            <a href="/file/d/FILE1/view">document1.txt</a>
            <a href="/drive/folders/FOLDER1">subfolder1</a>
            <a href="/file/d/FILE2/view">document2.pdf</a>
            <a href="/drive/folders/FOLDER2">subfolder2</a>
            </body>
            </html>
        "#;
        let files = parse_folder_view(html).unwrap();
        assert_eq!(files.len(), 4);
        // First file
        assert_eq!(files[0].id, "FILE1");
        assert!(!files[0].is_folder);
        // Second file
        assert_eq!(files[1].id, "FILE2");
        assert!(!files[1].is_folder);
        // First folder
        assert_eq!(files[2].id, "FOLDER1");
        assert!(files[2].is_folder);
        // Second folder
        assert_eq!(files[3].id, "FOLDER2");
        assert!(files[3].is_folder);
    }

    #[test]
    fn test_parse_folder_view_empty() {
        let html = r#"<html><body><p>No files here</p></body></html>"#;
        let files = parse_folder_view(html).unwrap();
        assert!(files.is_empty());
    }

    #[test]
    fn test_parse_folder_view_js_shareable_link() {
        // JavaScript setShareableLink pattern - only used when no file/folder links found
        // Since /file/d/ID appears in JS, it will be picked up by file_regex first
        // and the js_file_regex fallback won't run
        let html = r#"
            <html>
            <body>
            <script>
            setShareableLink('/file/d/1DvsG277pWa4WMssXjD9qYYAdF51y7hVidZ6eklfq480');
            </script>
            </body>
            </html>
        "#;
        let files = parse_folder_view(html).unwrap();
        // file_regex finds the ID in the JS (no /view suffix needed)
        assert_eq!(files.len(), 1);
        assert_eq!(files[0].id, "1DvsG277pWa4WMssXjD9qYYAdF51y7hVidZ6eklfq480");
        assert!(!files[0].is_folder);
        // Name is empty since file_regex doesn't generate names
        assert_eq!(files[0].name, "");
    }
}