use std::{ffi::OsStr, path::Path};
use self::{
details::{MediaEntity, TweetDetails, VideoInfo},
utils::RequestDetails,
};
use crate::{
header::HeaderMapBuilder,
prelude::{DownloadError, Downloader},
resource::ResourceDownloader,
twitter::{details::MediaType, utils::retrieve_request_details},
};
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
use regex::Regex;
use reqwest::{header::HeaderValue, Client, Response, Url};
use serde::Deserialize;
mod details;
pub mod utils;
#[derive(Clone)]
pub struct TwitterDownloader {
url: Url,
tweet_id: String,
status_id: String,
only_media_kind: Option<MediaKind>,
names_callback: fn(usize, TwitterMedia) -> String,
name_all: Option<String>,
name_if_only_one_file: Option<String>,
print_download_status: bool,
}
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone)]
pub enum MediaKind {
Image,
Video,
}
#[derive(Debug, Deserialize)]
struct GuestTokenResponse {
guest_token: String,
}
impl TwitterDownloader {
pub fn new(link: &str) -> Result<Self, DownloadError> {
let url = Self::parse_url(
link,
Some("https://www.twitter.com/<USERNAME>/status/<TWEET_ID>"),
)?;
if !Self::is_valid_url(&url) {
return Err(DownloadError::InvalidUrl(
"Invalid URL! The domain must be either 'www.twitter.com' or 'www.x.com'."
.to_owned(),
));
}
let (status_id, tweet_id) = Self::extract_ids_from_url(&url)?;
Ok(Self {
url,
status_id,
tweet_id,
only_media_kind: None,
names_callback: |index: usize, media: TwitterMedia| {
let extension = media.extension();
let filename = extension.map_or_else(
|| format!("{}", index + 1),
|ext| format!("{}.{}", index + 1, ext.to_string_lossy()),
);
filename
},
name_all: None,
name_if_only_one_file: None,
print_download_status: false,
})
}
pub fn set_name_callback(&mut self, callback: fn(usize, TwitterMedia) -> String) -> &mut Self {
self.names_callback = callback;
self
}
pub fn name_all(&mut self, value: String) -> &mut Self {
self.name_all = Some(value);
self
}
pub fn name_if_only_file(&mut self, value: String) -> &mut Self {
self.name_if_only_one_file = Some(value);
self
}
async fn get_tweet_medias(&self) -> Result<Vec<MediaEntity>, DownloadError> {
let (bearer_token, guest_token) = self.get_tokens().await?;
let tweet_details = self.get_tweet_details(&bearer_token, &guest_token).await?;
let opt_medias = tweet_details.data.tweet_result.result.legacy.entities.media;
let medias = opt_medias.ok_or_else(|| {
DownloadError::TwitterError(format!(
"The tweet with ID `{}` does not contain any associated media.",
self.tweet_id()
))
})?;
Ok(medias)
}
fn extract_ids_from_url(url: &Url) -> Result<(String, String), DownloadError> {
let pattern = r"https://(twitter|x)\.com/([^/]+)/status/(\d+)";
let url_regex = Regex::new(pattern).unwrap();
if let Some(captures) = url_regex.captures(url.as_str()) {
if let (Some(status_id), Some(tweet_id)) = (captures.get(2), captures.get(3)) {
return Ok((status_id.as_str().to_owned(), tweet_id.as_str().to_owned()));
}
}
Err(DownloadError::TwitterError(format!(
"Failed to parse status_id and tweet_id from the tweet URL: `{}`",
url
)))
}
pub fn only_images(&mut self) -> &mut Self {
self.only_media_kind = Some(MediaKind::Image);
self
}
pub fn only_videos(&mut self) -> &mut Self {
self.only_media_kind = Some(MediaKind::Video);
self
}
pub fn status_id(&self) -> &str {
&self.status_id
}
pub fn tweet_id(&self) -> &str {
&self.tweet_id
}
pub fn url_str(&self) -> &str {
self.url.as_str()
}
async fn fetch_page_content(url: &str) -> Result<String, DownloadError> {
let response = reqwest::get(url).await?;
if !response.status().is_success() {
return Err(DownloadError::TwitterError(format!(
"Failed to fetch content from URL: {}",
url
)));
}
response.text().await.map_err(|_| {
DownloadError::TwitterError(format!("Failed to read text from URL: {}", url))
})
}
async fn get_mainjs_url(&self) -> Result<String, DownloadError> {
let content = Self::fetch_page_content(self.url_str()).await?;
let main_js_regex =
Regex::new(r"https://abs.twimg.com/responsive-web/client-web-legacy/main\.[^.]+\.js")
.unwrap();
let mainjs_urls: Vec<&str> = main_js_regex
.find_iter(&content)
.map(|mat| mat.as_str())
.collect();
if mainjs_urls.is_empty() {
return Err(DownloadError::TwitterError(format!(
"Failed to retrieve `main.js` file from `{}` page.",
self.url
)));
}
Ok(mainjs_urls[0].to_owned())
}
async fn get_bearer_token(&self, mainjs_url: &str) -> Result<String, DownloadError> {
let main_js_content = Self::fetch_page_content(mainjs_url).await?;
let bearer_regex = Regex::new(r#"AAAAAAAAA[^\"']+"#).unwrap();
let bearer_tokens: Vec<&str> = bearer_regex
.find_iter(&main_js_content)
.map(|mat| mat.as_str())
.collect();
if bearer_tokens.is_empty() {
return Err(DownloadError::TwitterError(format!(
"Failed to find bearer token from `{}` page",
self.url
)));
}
let bearer_token = bearer_tokens[0];
Ok(bearer_token.to_owned())
}
async fn get_guest_token(&self, bearer_token: &str) -> Result<String, DownloadError> {
let client = Client::new();
let headers = HeaderMapBuilder::new()
.with_user_agent()
.accept("*/*")
.accept_language("fr,en-US;q=0.7,en;q=0.3")
.te("trailers")
.authorization(
HeaderValue::from_bytes(format!("Bearer {}", bearer_token).as_bytes())
.expect("Failed to create HeaderValue"),
)
.build();
let body = client
.post("https://api.twitter.com/1.1/guest/activate.json")
.headers(headers)
.send()
.await
.map(|res| {
if !res.status().is_success() {
return Err(DownloadError::TwitterError(format!(
"Failed to find guest token from `{}` page",
self.url
)));
}
Ok(res)
})??
.text()
.await?;
serde_json::from_str::<GuestTokenResponse>(&body)
.map(|token_response| token_response.guest_token)
.map_err(|_| {
DownloadError::TwitterError(format!(
"Failed to find guest token from `{}` page",
self.url
))
})
}
async fn get_tokens(&self) -> Result<(String, String), DownloadError> {
let mainjs_url = self.get_mainjs_url().await?;
let bearer_token = self.get_bearer_token(&mainjs_url).await?;
let guest_token = self.get_guest_token(&bearer_token).await?;
Ok((bearer_token, guest_token))
}
async fn get_details_url(&self) -> Result<String, DownloadError> {
let RequestDetails {
mut variables,
features,
} = retrieve_request_details().await?;
variables.set_tweet_id(self.tweet_id().to_owned());
let features_string = serde_json::to_string(&features).unwrap();
let variables_string = serde_json::to_string(&variables).unwrap();
let features_encoded = utf8_percent_encode(&features_string, NON_ALPHANUMERIC).to_string();
let variables_encoded =
utf8_percent_encode(&variables_string, NON_ALPHANUMERIC).to_string();
let url = format!("https://twitter.com/i/api/graphql/ncDeACNGIApPMaqGVuF_rw/TweetResultByRestId?variables={}&features={}", variables_encoded, features_encoded);
Ok(url)
}
async fn retrieve_details(
&self,
bearer_token: &str,
guest_token: &str,
) -> Result<Response, DownloadError> {
let url = self.get_details_url().await?;
let client = Client::new();
let headers = HeaderMapBuilder::new()
.with_user_agent()
.accept("*/*")
.accept_language("fr,en-US;q=0.7,en;q=0.3")
.te("trailers")
.authorization(
HeaderValue::from_bytes(format!("Bearer {}", bearer_token).as_bytes())
.expect("Failed to create HeaderValue"),
)
.field(
"x-guest-token",
HeaderValue::from_str(guest_token).expect("Failed to create HeaderValue"),
)
.build();
let details = client.get(url).headers(headers).send().await?;
Ok(details)
}
async fn get_tweet_details(
&self,
bearer_token: &str,
guest_token: &str,
) -> Result<TweetDetails, DownloadError> {
let details = self.retrieve_details(bearer_token, guest_token).await?;
if !details.status().is_success() {
return Err(DownloadError::TwitterError(format!(
"Failed to get details of tweet with id `{}`",
self.tweet_id()
)));
}
let response_text = details.text().await?;
let tweet_details = serde_json::from_str(&response_text).map_err(|_e| {
DownloadError::TwitterError("Failed to parse tweet details.".to_owned())
})?;
Ok(tweet_details)
}
}
#[async_trait::async_trait]
impl Downloader for TwitterDownloader {
fn is_valid_url(url: &Url) -> bool {
url.domain() == Some("twitter.com")
|| url.domain() == Some("x.com")
|| url.domain() == Some("www.twitter.com")
|| url.domain() == Some("www.x.com")
}
fn get_dl_status(&mut self) -> &mut bool {
&mut self.print_download_status
}
async fn download_to<P: AsRef<Path> + std::marker::Send>(
&self,
folder_path: P,
) -> Result<(), DownloadError> {
let path = folder_path.as_ref();
let medias = self.get_tweet_medias().await?;
let media_infos = medias
.iter()
.map(|media_entity| {
media_entity.try_into().map_err(|e| {
DownloadError::TwitterError(format!(
"{} in `{}` tweet details.",
e,
self.tweet_id()
))
})
})
.collect::<Result<Vec<TwitterMedia>, DownloadError>>()?;
let download_links: Vec<TwitterMedia> = media_infos
.into_iter()
.filter(|x| TwitterMedia::filter_media_kind(x, self.only_media_kind.as_ref()))
.collect();
if self.print_download_status {
println!("Downloading...");
}
tokio::fs::create_dir_all(path).await?;
let number_of_files = download_links.len();
let results = futures::future::join_all(download_links.into_iter().enumerate().map(
|(index, media)| async move {
let url = media.url();
let mut rsrc_downloader = ResourceDownloader::new(url).map_err(|_| {
DownloadError::TwitterError(format!("Invalid Media File path: `{}`", url))
})?;
let filename = if self.name_if_only_one_file.is_some() && number_of_files == 1 {
self.name_if_only_one_file.as_ref().unwrap().to_owned()
} else if let Some(name) = self.name_all.as_ref() {
let mut s = name.to_owned();
if index != 0 {
s.push_str(&format!(" ({})", index.to_string()));
}
s
} else {
(self.names_callback)(index, media)
};
rsrc_downloader.with_name(filename);
let download_result = rsrc_downloader.download_to(&path).await;
if self.print_download_status {
if let Err(err) = &download_result {
eprintln!("Error downloading with url `{}`: {:?}", url, err);
} else {
println!("Media downloaded successfully: {}", url);
}
}
download_result
},
))
.await;
for result in results {
result?
}
Ok(())
}
async fn download(&self) -> Result<(), DownloadError> {
self.download_to("./").await
}
fn blocking_download(&self) -> Result<(), DownloadError>
where
Self: Sync,
{
Self::blocking(async { self.download().await })
}
fn blocking_download_to<P: AsRef<Path> + std::marker::Send>(
&self,
path: P,
) -> Result<(), DownloadError>
where
Self: Sync,
{
Self::blocking(async { self.download_to(path).await })
}
}
#[derive(Debug, Clone, Copy)]
pub enum TwitterMedia<'a> {
Image { url: &'a str },
Video { infos: &'a VideoInfo },
}
impl<'a> TwitterMedia<'a> {
pub fn url(&self) -> &'a str {
match self {
TwitterMedia::Image { url } => *url,
TwitterMedia::Video { infos, .. } => {
let VideoInfo { variants, .. } = infos;
let opt_variant = variants
.iter()
.max_by_key(|variant| variant.bitrate.unwrap_or(0));
let variant = opt_variant.unwrap();
&variant.url
}
}
}
pub fn extension(&self) -> Option<&OsStr> {
match self {
TwitterMedia::Image { url } => Path::new(url).extension(),
TwitterMedia::Video { infos, .. } => {
let VideoInfo { variants, .. } = infos;
let opt_variant = variants
.iter()
.max_by_key(|variant| variant.bitrate.unwrap_or(0));
let variant = opt_variant.unwrap();
let extension = variant.content_type.split('/').nth(1);
extension.map(|s| OsStr::new(s))
}
}
}
fn filter_media_kind(&self, media_kind: Option<&MediaKind>) -> bool {
match media_kind {
None => true,
Some(_) if media_kind == Some(&MediaKind::Image) => match self {
TwitterMedia::Image { .. } => true,
_ => false,
},
Some(_) if media_kind == Some(&MediaKind::Video) => match self {
TwitterMedia::Video { .. } => true,
_ => false,
},
Some(_) => false,
}
}
}
impl<'a> TryFrom<&'a MediaEntity> for TwitterMedia<'a> {
type Error = String;
fn try_from(media_entity: &'a MediaEntity) -> Result<Self, Self::Error> {
match media_entity._type {
MediaType::Image => Ok(TwitterMedia::Image {
url: &media_entity.media_url_https,
}),
MediaType::Video | MediaType::Gif => Ok(TwitterMedia::Video {
infos: media_entity
.video_info
.as_ref()
.ok_or("Media with type video but with no video info found".to_owned())?,
}),
}
}
}