use crate::error::{GdownError, Result};
use regex::Regex;
pub type FileId = String;
pub fn parse_url(url: &str) -> Result<(Option<FileId>, bool)> {
if !url.contains("://") && !url.starts_with("http") {
return Ok((Some(url.trim().to_string()), false));
}
let parsed = url::Url::parse(url).map_err(GdownError::UrlError)?;
let host = parsed.host_str().unwrap_or_default();
let is_drive = host == "drive.google.com" || host == "docs.google.com";
if !is_drive {
return Ok((None, false));
}
let path = parsed.path();
let is_download_link = path == "/uc" || path.ends_with("/uc");
let query: std::collections::HashMap<String, String> = parsed
.query_pairs()
.map(|(k, v)| (k.to_string(), v.to_string()))
.collect();
if let Some(id) = query.get("id") {
return Ok((Some(id.clone()), is_download_link));
}
let re1 = Regex::new(r"^/file/d/([^/]+)/(edit|view)$").unwrap();
if let Some(caps) = re1.captures(path) {
return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
}
let re2 = Regex::new(r"^/file/u/[0-9]+/d/([^/]+)/(edit|view)$").unwrap();
if let Some(caps) = re2.captures(path) {
return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
}
let re3 = Regex::new(r"^/document/d/([^/]+)/(edit|htmlview|view)$").unwrap();
if let Some(caps) = re3.captures(path) {
return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
}
let re4 = Regex::new(r"^/document/u/[0-9]+/d/([^/]+)/(edit|htmlview|view)$").unwrap();
if let Some(caps) = re4.captures(path) {
return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
}
let re5 = Regex::new(r"^/presentation/d/([^/]+)/(edit|htmlview|view)$").unwrap();
if let Some(caps) = re5.captures(path) {
return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
}
let re6 = Regex::new(r"^/presentation/u/[0-9]+/d/([^/]+)/(edit|htmlview|view)$").unwrap();
if let Some(caps) = re6.captures(path) {
return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
}
let re7 = Regex::new(r"^/spreadsheets/d/([^/]+)/(edit|htmlview|view)$").unwrap();
if let Some(caps) = re7.captures(path) {
return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
}
let re8 = Regex::new(r"^/spreadsheets/u/[0-9]+/d/([^/]+)/(edit|htmlview|view)$").unwrap();
if let Some(caps) = re8.captures(path) {
return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
}
let re9 = Regex::new(r"^/drive/folders/([^/]+)").unwrap();
if let Some(caps) = re9.captures(path) {
return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
}
let re10 = Regex::new(r"^/drive/folders/([^/]+)/view$").unwrap();
if let Some(caps) = re10.captures(path) {
return Ok((Some(caps.get(1).unwrap().as_str().to_string()), false));
}
Ok((None, is_download_link))
}
pub fn is_google_drive_url(url: &str) -> bool {
if let Ok((id, _)) = parse_url(url) {
id.is_some()
} else {
false
}
}
pub fn build_download_url(file_id: &str) -> String {
format!("https://drive.google.com/uc?id={}&export=download", file_id)
}
pub fn build_export_url(file_id: &str, format: &str) -> String {
format!(
"https://docs.google.com/document/d/{}/export?format={}",
file_id, format
)
}
#[cfg(test)]
mod tests {
use super::*;
const REAL_FILE_ID: &str = "0B_NiLAzvehC9R2stRmQyM3ZiVjQ";
const REAL_FILE_ID2: &str = "0B9P1L--7Wd2vU3VUVlFnbTgtS2c";
const REAL_FOLDER_ID: &str = "15uNXeRBIhVvZJIhL4yTw4IsStMhUaaxl";
const REAL_GOOGLE_DOC_ID: &str = "1DvsG277pWa4WMssXjD9qYYAdF51y7hVidZ6eklfq480";
#[test]
fn test_parse_google_open() {
let url = format!("https://drive.google.com/open?id={}", REAL_FILE_ID);
let (id, is_dl) = parse_url(&url).unwrap();
assert_eq!(id, Some(REAL_FILE_ID.to_string()));
assert!(!is_dl);
}
#[test]
fn test_parse_uc_download_link() {
let url = format!("https://drive.google.com/uc?id={}", REAL_FILE_ID);
let (id, is_dl) = parse_url(&url).unwrap();
assert_eq!(id, Some(REAL_FILE_ID.to_string()));
assert!(is_dl);
}
#[test]
fn test_parse_file_view_link() {
let url = format!("https://drive.google.com/file/d/{}/view?usp=sharing", REAL_FILE_ID2);
let (id, is_dl) = parse_url(&url).unwrap();
assert_eq!(id, Some(REAL_FILE_ID2.to_string()));
assert!(!is_dl);
}
#[test]
fn test_parse_subdomain_uc_link() {
let url = format!("https://drive.google.com/a/jsk.imi.i.u-tokyo.ac.jp/uc?id={}&export=download", REAL_FILE_ID);
let (id, is_dl) = parse_url(&url).unwrap();
assert_eq!(id, Some(REAL_FILE_ID.to_string()));
assert!(is_dl);
}
#[test]
fn test_parse_file_edit_link() {
let url = format!("https://drive.google.com/file/d/{}/edit", REAL_FILE_ID);
let (id, is_dl) = parse_url(&url).unwrap();
assert_eq!(id, Some(REAL_FILE_ID.to_string()));
assert!(!is_dl);
}
#[test]
fn test_parse_open_link() {
let url = format!("https://drive.google.com/open?id={}", REAL_FILE_ID);
let (id, is_dl) = parse_url(&url).unwrap();
assert_eq!(id, Some(REAL_FILE_ID.to_string()));
assert!(!is_dl);
}
#[test]
fn test_parse_google_doc_edit() {
let url = format!("https://docs.google.com/document/d/{}/edit", REAL_GOOGLE_DOC_ID);
let (id, is_dl) = parse_url(&url).unwrap();
assert_eq!(id, Some(REAL_GOOGLE_DOC_ID.to_string()));
assert!(!is_dl);
}
#[test]
fn test_parse_google_doc_view() {
let url = format!("https://docs.google.com/document/d/{}/view", REAL_GOOGLE_DOC_ID);
let (id, is_dl) = parse_url(&url).unwrap();
assert_eq!(id, Some(REAL_GOOGLE_DOC_ID.to_string()));
assert!(!is_dl);
}
#[test]
fn test_parse_google_doc_htmlview() {
let url = format!("https://docs.google.com/document/d/{}/htmlview", REAL_GOOGLE_DOC_ID);
let (id, is_dl) = parse_url(&url).unwrap();
assert_eq!(id, Some(REAL_GOOGLE_DOC_ID.to_string()));
assert!(!is_dl);
}
#[test]
fn test_parse_google_sheet_edit() {
let url = format!("https://docs.google.com/spreadsheets/d/{}/edit", REAL_GOOGLE_DOC_ID);
let (id, is_dl) = parse_url(&url).unwrap();
assert_eq!(id, Some(REAL_GOOGLE_DOC_ID.to_string()));
assert!(!is_dl);
}
#[test]
fn test_parse_google_slides_edit() {
let url = format!("https://docs.google.com/presentation/d/{}/edit", REAL_GOOGLE_DOC_ID);
let (id, is_dl) = parse_url(&url).unwrap();
assert_eq!(id, Some(REAL_GOOGLE_DOC_ID.to_string()));
assert!(!is_dl);
}
#[test]
fn test_parse_folder_link() {
let url = format!("https://drive.google.com/drive/folders/{}", REAL_FOLDER_ID);
let (id, is_dl) = parse_url(&url).unwrap();
assert_eq!(id, Some(REAL_FOLDER_ID.to_string()));
assert!(!is_dl);
}
#[test]
fn test_parse_file_u_d_link() {
let url = format!("https://drive.google.com/file/u/0/d/{}/view", REAL_FILE_ID);
let (id, is_dl) = parse_url(&url).unwrap();
assert_eq!(id, Some(REAL_FILE_ID.to_string()));
assert!(!is_dl);
}
#[test]
fn test_parse_document_u_d_link() {
let url = format!("https://docs.google.com/document/u/0/d/{}/edit", REAL_GOOGLE_DOC_ID);
let (id, is_dl) = parse_url(&url).unwrap();
assert_eq!(id, Some(REAL_GOOGLE_DOC_ID.to_string()));
assert!(!is_dl);
}
#[test]
fn test_parse_presentation_u_d_link() {
let url = format!("https://docs.google.com/presentation/u/0/d/{}/edit", REAL_GOOGLE_DOC_ID);
let (id, is_dl) = parse_url(&url).unwrap();
assert_eq!(id, Some(REAL_GOOGLE_DOC_ID.to_string()));
assert!(!is_dl);
}
#[test]
fn test_parse_non_gdrive_url() {
let url = "https://github.com/wkentaro/gdown/archive/refs/tags/v4.0.0.tar.gz";
let (id, _) = parse_url(url).unwrap();
assert!(id.is_none());
}
#[test]
fn test_parse_bare_id() {
let id = REAL_FILE_ID;
let (result_id, is_dl) = parse_url(id).unwrap();
assert_eq!(result_id, Some(id.to_string()));
assert!(!is_dl);
}
#[test]
fn test_build_download_url() {
let url = build_download_url(REAL_FILE_ID);
assert_eq!(url, format!("https://drive.google.com/uc?id={}&export=download", REAL_FILE_ID));
}
#[test]
fn test_build_export_url() {
let url = build_export_url(REAL_FILE_ID, "pdf");
assert_eq!(url, format!("https://docs.google.com/document/d/{}/export?format=pdf", REAL_FILE_ID));
}
}