use crate::error::{GdownError, Result};
use crate::url::{parse_url, build_download_url, FileId};
use futures_util::stream::StreamExt;
use reqwest::Client;
use std::path::{Path, PathBuf};
use std::time::Duration;
const DEFAULT_USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
pub struct DownloadOptions {
pub speed_limit: Option<u64>,
pub resume: bool,
pub format: Option<String>,
#[allow(clippy::type_complexity)]
pub progress_callback: Option<Box<dyn Fn(u64, Option<u64>) + Send + 'static>>,
}
impl Clone for DownloadOptions {
fn clone(&self) -> Self {
Self {
speed_limit: self.speed_limit,
resume: self.resume,
format: self.format.clone(),
progress_callback: None, }
}
}
impl std::fmt::Debug for DownloadOptions {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("DownloadOptions")
.field("speed_limit", &self.speed_limit)
.field("resume", &self.resume)
.field("format", &self.format)
.field("progress_callback", &"...")
.finish()
}
}
#[derive(Clone)]
pub struct Downloader {
proxy: Option<String>,
user_agent: String,
verify_ssl: bool,
cookies_path: PathBuf,
}
impl Downloader {
pub fn new() -> Self {
Self {
proxy: None,
user_agent: DEFAULT_USER_AGENT.to_string(),
verify_ssl: true,
cookies_path: PathBuf::from("~/.cache/gdown/cookies.txt"),
}
}
pub fn proxy(mut self, proxy: &str) -> Self {
self.proxy = Some(proxy.to_string());
self
}
pub fn user_agent(mut self, ua: &str) -> Self {
self.user_agent = ua.to_string();
self
}
pub fn verify_ssl(mut self, verify: bool) -> Self {
self.verify_ssl = verify;
self
}
pub fn cookies_path(mut self, path: &Path) -> Self {
self.cookies_path = path.to_path_buf();
self
}
pub fn build_client(&self) -> Client {
let mut builder = Client::builder()
.user_agent(&self.user_agent)
.timeout(Duration::from_secs(60));
if !self.verify_ssl {
builder = builder.danger_accept_invalid_certs(true);
}
if let Some(proxy) = &self.proxy {
if proxy.starts_with("socks5://") {
builder = builder.proxy(reqwest::Proxy::all(proxy).unwrap());
} else {
builder = builder.proxy(reqwest::Proxy::http(proxy).unwrap());
}
}
builder.build().unwrap_or_else(|_| {
Client::builder()
.user_agent(&self.user_agent)
.build()
.expect("Failed to create HTTP client")
})
}
pub async fn download(
&self,
url: &str,
output: &Path,
options: DownloadOptions,
) -> Result<u64> {
let client = self.build_client();
let (file_id, is_download_link) = parse_url(url)?;
let file_id = file_id.ok_or_else(|| GdownError::InvalidUrl("No file ID found".into()))?;
let request_url = if is_download_link {
build_download_url(&file_id)
} else {
if let Some(format) = options.format.clone() {
if url.contains("document") {
return self.download_doc_export(&file_id, &format, output, options).await;
} else if url.contains("spreadsheet") {
return self.download_sheet_export(&file_id, &format, output, options).await;
} else if url.contains("presentation") {
return self.download_slides_export(&file_id, &format, output, options).await;
}
}
build_download_url(&file_id)
};
let response = client.get(&request_url).send().await.map_err(|e| GdownError::Download(e.to_string()))?;
let content_type = response
.headers()
.get("Content-Type")
.and_then(|v| v.to_str().ok())
.unwrap_or("");
if content_type.contains("text/html") {
let html = response.text().await.map_err(|e| GdownError::Download(e.to_string()))?;
let actual_url = self.extract_confirmation_url(&html).await?;
if options.resume && output.exists() {
return self.resume_download(&actual_url, output, options).await;
}
return self.download_file(&actual_url, output, options).await;
}
if options.resume && output.exists() {
return self.resume_download(&request_url, output, options).await;
}
self.download_file(&request_url, output, options).await
}
async fn download_doc_export(
&self,
file_id: &FileId,
format: &str,
output: &Path,
options: DownloadOptions,
) -> Result<u64> {
let url = format!(
"https://docs.google.com/document/d/{}/export?format={}",
file_id, format
);
self.download_file(&url, output, options).await
}
async fn download_sheet_export(
&self,
file_id: &FileId,
format: &str,
output: &Path,
options: DownloadOptions,
) -> Result<u64> {
let url = format!(
"https://docs.google.com/spreadsheets/d/{}/export?format={}",
file_id, format
);
self.download_file(&url, output, options).await
}
async fn download_slides_export(
&self,
file_id: &FileId,
format: &str,
output: &Path,
options: DownloadOptions,
) -> Result<u64> {
let url = format!(
"https://docs.google.com/presentation/d/{}/export?format={}",
file_id, format
);
self.download_file(&url, output, options).await
}
async fn extract_confirmation_url(&self, html: &str) -> Result<String> {
use regex::Regex;
let form_regex = Regex::new(r#"action="([^"]+)""#).unwrap();
if let Some(caps) = form_regex.captures(html) {
let action = caps.get(1).unwrap().as_str();
if action.contains("confirm") {
let client = self.build_client();
let token_regex = Regex::new(r#"name="confirm".*?value="([^"]+)""#).unwrap();
let token = token_regex.captures(html).and_then(|c| c.get(1)).map(|m| m.as_str());
let mut request = client.post(action);
if let Some(t) = token {
request = request.form(&[("confirm", t)]);
}
let response = request.send().await.map_err(|e| GdownError::Download(e.to_string()))?;
if let Some(location) = response.headers().get("Location") {
return Ok(location.to_str().unwrap_or(action).to_string());
}
}
return Ok(action.to_string());
}
let download_url_regex = Regex::new(r#"downloadUrl\s*:\s*"([^"]+)""#).unwrap();
if let Some(caps) = download_url_regex.captures(html) {
return Ok(caps.get(1).unwrap().as_str().to_string());
}
Err(GdownError::FileUrlRetrieval("Could not find download URL in confirmation page".into()))
}
async fn download_file(
&self,
url: &str,
output: &Path,
options: DownloadOptions,
) -> Result<u64> {
use tokio::io::AsyncWriteExt;
let client = self.build_client();
let response = client.get(url).send().await.map_err(|e| GdownError::Download(e.to_string()))?;
let total_size = response.content_length();
let mut file = tokio::fs::File::create(output).await?;
let mut downloaded: u64 = 0;
let mut stream = response.bytes_stream();
while let Some(chunk_result) = stream.next().await {
let chunk = chunk_result.map_err(|e| GdownError::Download(e.to_string()))?;
file.write_all(&chunk).await?;
downloaded += chunk.len() as u64;
if let Some(ref cb) = options.progress_callback {
cb(downloaded, total_size);
}
if let Some(limit) = options.speed_limit {
let expected_time = (downloaded as f64 / limit as f64 * 1000.0) as u64;
tokio::time::sleep(std::time::Duration::from_millis(expected_time)).await;
}
}
Ok(downloaded)
}
async fn resume_download(
&self,
url: &str,
output: &Path,
options: DownloadOptions,
) -> Result<u64> {
use tokio::io::AsyncWriteExt;
let existing_size = tokio::fs::metadata(output).await?.len();
let client = self.build_client();
let response = client
.get(url)
.header("Range", format!("bytes={}-", existing_size))
.send()
.await.map_err(|e| GdownError::Download(e.to_string()))?;
let mut file = tokio::fs::OpenOptions::new()
.append(true)
.open(output)
.await?;
let mut downloaded = existing_size;
let mut stream = response.bytes_stream();
while let Some(chunk_result) = stream.next().await {
let chunk = chunk_result.map_err(|e| GdownError::Download(e.to_string()))?;
file.write_all(&chunk).await?;
downloaded += chunk.len() as u64;
if let Some(ref cb) = options.progress_callback {
cb(downloaded, None);
}
}
Ok(downloaded)
}
pub fn get_filename_from_disposition(disposition: &str) -> Option<String> {
if let Some(start) = disposition.find("filename*=UTF-8''") {
let remainder = &disposition[start + 17..];
if let Some(end) = remainder.find(';') {
return Some(remainder[..end].to_string());
}
return Some(remainder.to_string());
}
if let Some(start) = disposition.find("filename=\"") {
let remainder = &disposition[start + 10..];
if let Some(end) = remainder.find('"') {
return Some(remainder[..end].to_string());
}
}
None
}
}
impl Default for Downloader {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_downloader_creation() {
let dl = Downloader::new();
assert!(dl.verify_ssl);
}
#[test]
fn test_filename_from_disposition() {
let disp = r#"attachment; filename="test.txt"; filename*=UTF-8''test%20file.txt"#;
let filename = Downloader::get_filename_from_disposition(disp);
assert_eq!(filename, Some("test%20file.txt".to_string()));
}
#[test]
fn test_filename_simple() {
let disp = r#"attachment; filename="test.txt""#;
let filename = Downloader::get_filename_from_disposition(disp);
assert_eq!(filename, Some("test.txt".to_string()));
}
#[test]
fn test_filename_from_disposition_empty() {
let disp = r#"attachment"#;
let filename = Downloader::get_filename_from_disposition(disp);
assert_eq!(filename, None);
}
#[test]
fn test_filename_from_disposition_only_filename_star() {
let disp = r#"attachment; filename*=UTF-8''test%20file.txt"#;
let filename = Downloader::get_filename_from_disposition(disp);
assert_eq!(filename, Some("test%20file.txt".to_string()));
}
#[test]
fn test_filename_from_disposition_with_spaces() {
let disp = r#"attachment; filename="test file with spaces.txt""#;
let filename = Downloader::get_filename_from_disposition(disp);
assert_eq!(filename, Some("test file with spaces.txt".to_string()));
}
#[test]
fn test_filename_from_disposition_no_quotes() {
let disp = r#"attachment; filename=test.txt"#;
let filename = Downloader::get_filename_from_disposition(disp);
assert_eq!(filename, None); }
#[test]
fn test_filename_from_disposition_rfc5987_chars() {
let disp = r#"attachment; filename*=UTF-8''%E6%96%87%E4%BB%B6.txt"#;
let filename = Downloader::get_filename_from_disposition(disp);
assert_eq!(filename, Some("%E6%96%87%E4%BB%B6.txt".to_string()));
}
}