use crate::download::DownloadOptions;
use crate::download::Downloader;
use crate::error::{GdownError, Result};
use crate::url::parse_url;
use std::pin::Pin;
use std::path::PathBuf;
#[derive(Debug, Clone)]
pub struct GoogleDriveFile {
pub id: String,
pub name: String,
pub is_folder: bool,
pub size: Option<u64>,
}
#[derive(Debug, Clone)]
pub struct FolderDownloadOptions {
pub speed_limit: Option<u64>,
pub resume: bool,
}
pub fn download_folder<'a>(
downloader: &'a Downloader,
folder_url: &'a str,
output_dir: PathBuf,
options: FolderDownloadOptions,
) -> Pin<Box<dyn futures::Future<Output = Result<Vec<GoogleDriveFile>>> + 'a>> {
Box::pin(async move {
let (folder_id, _) = parse_url(folder_url)?;
let folder_id = folder_id.ok_or_else(|| GdownError::InvalidUrl("No folder ID found".into()))?;
let view_url = format!(
"https://drive.google.com/embeddedfolderview?id={}#list",
folder_id
);
let client = downloader.build_client();
let response = client.get(&view_url).send().await.map_err(|e| GdownError::Download(e.to_string()))?;
let html = response.text().await.map_err(|e| GdownError::Download(e.to_string()))?;
let files = parse_folder_view(&html)?;
tokio::fs::create_dir_all(&output_dir).await?;
let mut downloaded_files = Vec::new();
for file in files {
if file.is_folder {
let subfolder_path = output_dir.join(&file.name);
let subfolder_url = format!("https://drive.google.com/drive/folders/{}", file.id);
download_folder(downloader, &subfolder_url, subfolder_path, options.clone()).await?;
} else {
let file_url = format!("https://drive.google.com/file/d/{}/view", file.id);
let output_path = output_dir.join(&file.name);
let opts = DownloadOptions {
speed_limit: options.speed_limit,
resume: options.resume,
format: None,
progress_callback: None,
};
downloader.download(&file_url, &output_path, opts).await?;
}
downloaded_files.push(file);
}
Ok(downloaded_files)
})
}
pub fn parse_folder_view(html: &str) -> Result<Vec<GoogleDriveFile>> {
use regex::Regex;
let mut files = Vec::new();
let file_regex = Regex::new(r#"/file/d/([A-Za-z0-9_-]+)"#).unwrap();
for caps in file_regex.captures_iter(html) {
let id = caps.get(1).unwrap().as_str().to_string();
files.push(GoogleDriveFile {
id,
name: String::new(),
is_folder: false,
size: None,
});
}
let folder_regex = Regex::new(r#"/drive/folders/([A-Za-z0-9_-]+)"#).unwrap();
for caps in folder_regex.captures_iter(html) {
let id = caps.get(1).unwrap().as_str().to_string();
files.push(GoogleDriveFile {
id,
name: String::new(),
is_folder: true,
size: None,
});
}
if files.is_empty() {
let js_file_regex = Regex::new(r#"setShareableLink.*?/file/d/([A-Za-z0-9_-]+)"#).unwrap();
for caps in js_file_regex.captures_iter(html) {
let id = caps.get(1).unwrap().as_str().to_string();
files.push(GoogleDriveFile {
id: id.clone(),
name: format!("file_{}", &id[..8]),
is_folder: false,
size: None,
});
}
}
Ok(files)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_folder_view_file_links() {
let html = r#"
<html>
<body>
<a href="/file/d/0B_NiLAzvehC9R2stRmQyM3ZiVjQ/view">Test File 1.txt</a>
<a href="/file/d/0B9P1L--7Wd2vU3VUVlFnbTgtS2c/view">Test File 2.pdf</a>
</body>
</html>
"#;
let files = parse_folder_view(html).unwrap();
assert_eq!(files.len(), 2);
assert_eq!(files[0].id, "0B_NiLAzvehC9R2stRmQyM3ZiVjQ");
assert!(!files[0].is_folder);
assert_eq!(files[1].id, "0B9P1L--7Wd2vU3VUVlFnbTgtS2c");
assert!(!files[1].is_folder);
}
#[test]
fn test_parse_folder_view_folder_links() {
let html = r#"
<html>
<body>
<a href="/drive/folders/15uNXeRBIhVvZJIhL4yTw4IsStMhUaaxl">My Subfolder</a>
</body>
</html>
"#;
let files = parse_folder_view(html).unwrap();
assert_eq!(files.len(), 1);
assert_eq!(files[0].id, "15uNXeRBIhVvZJIhL4yTw4IsStMhUaaxl");
assert!(files[0].is_folder);
}
#[test]
fn test_parse_folder_view_mixed_content() {
let html = r#"
<html>
<body>
<a href="/file/d/FILE1/view">document1.txt</a>
<a href="/drive/folders/FOLDER1">subfolder1</a>
<a href="/file/d/FILE2/view">document2.pdf</a>
<a href="/drive/folders/FOLDER2">subfolder2</a>
</body>
</html>
"#;
let files = parse_folder_view(html).unwrap();
assert_eq!(files.len(), 4);
assert_eq!(files[0].id, "FILE1");
assert!(!files[0].is_folder);
assert_eq!(files[1].id, "FILE2");
assert!(!files[1].is_folder);
assert_eq!(files[2].id, "FOLDER1");
assert!(files[2].is_folder);
assert_eq!(files[3].id, "FOLDER2");
assert!(files[3].is_folder);
}
#[test]
fn test_parse_folder_view_empty() {
let html = r#"<html><body><p>No files here</p></body></html>"#;
let files = parse_folder_view(html).unwrap();
assert!(files.is_empty());
}
#[test]
fn test_parse_folder_view_js_shareable_link() {
let html = r#"
<html>
<body>
<script>
setShareableLink('/file/d/1DvsG277pWa4WMssXjD9qYYAdF51y7hVidZ6eklfq480');
</script>
</body>
</html>
"#;
let files = parse_folder_view(html).unwrap();
assert_eq!(files.len(), 1);
assert_eq!(files[0].id, "1DvsG277pWa4WMssXjD9qYYAdF51y7hVidZ6eklfq480");
assert!(!files[0].is_folder);
assert_eq!(files[0].name, "");
}
}