ac_rustube/descrambler/mod.rs
1use std::sync::Arc;
2
3use reqwest::Client;
4use url::Url;
5
6use cipher::Cipher;
7
8use crate::{IdBuf, Stream, Video, VideoDetails, VideoInfo};
9use crate::error::Error;
10use crate::video_info::player_response::streaming_data::RawFormat;
11use crate::video_info::player_response::streaming_data::StreamingData;
12
13mod cipher;
14
15/// A descrambler used to decrypt the data fetched by [`VideoFetcher`].
16///
17/// You will probably rarely use this type directly, and use [`Video`] instead.
18/// There's no public way of directly constructing a [`VideoDescrambler`]. The only way of getting
19/// one is by calling [`VideoFetcher::fetch`].
20///
21/// # Example
22/// ```no_run
23///# use rustube::{VideoFetcher, Id, VideoDescrambler};
24///# use url::Url;
25/// let url = Url::parse("https://youtube.com/watch?iv=5jlI4uzZGjU").unwrap();
26///
27///# tokio_test::block_on(async {
28/// let fetcher: VideoFetcher = VideoFetcher::from_url(&url).unwrap();
29/// let descrambler: VideoDescrambler = fetcher.fetch().await.unwrap();
30///# });
31/// ```
32///
33/// # How it works
34/// (To fully understand `descramble`, you should first read how [`VideoFetcher`] works).
35///
36/// Descrambling, in this case, mainly refers to descrambling the [`SignatureCipher`]. After we
37/// requested the [`VideoInfo`] in `fetch`, we are left with many [`RawFormat`]s. A [`RawFormat`] is
38/// just a bucket full of information about a video. Those formats come in two flavours: pre-signed
39/// and encrypted formats. Pre-signed formats are actually a free lunch. Such formats already
40/// contain a valid video URL, which can be used to download the video. The encrypted once are a
41/// little bit more complicated.
42///
43/// These encrypted [`RawFormat`]s contain a so called [`SignatureCipher`] with a the signature
44/// field [`s`] in it. This signature is a long string and the YouTube server requires us to
45/// include in the URL query or we get a `403` back. Unfortunalty this signature isn't correct yet!
46/// We first need to decrypt it. And that's where the `transform_plan` and the `transform_map` come
47/// into play.
48///
49/// The `transform_plan` is just a list of JavaScript function calls, which take a string (or an
50/// array) plus sometimes an integer as input. The called JavaScript functions then transforms the
51/// string in a certain way and returns a new string. This new string then represents the new
52/// signature. To decrypt the signature we just need to pass it through all of these functions in a
53/// row.
54///
55/// But wait! How can we run JavaScript in Rust? And doesn't that come with a considerable overhead?
56/// It actually would come with a vast overhead! That's why we need the `transform_map`. The
57/// `transform_map` is a `HashMap<String, TransformFn>`, which maps JavaScript function names to
58/// Rust functions.
59///
60/// To finally decrypt the signature, we just iterate over each function call in the the
61/// `transform_plan`, extract both the function name and the optinal integer argument, and call the
62/// corresponding Rust function in `transform_map`.
63///
64/// The last step `descramble` performs, is to take all [`RawFormat`]s, which now contain the
65/// correct signature, and convert them to [`Stream`]s. At the end of the day, `Stream`s are just
66/// `RawFormat`s with some extra information.
67///
68/// And that's it! We can now download a YouTube video like we would download any other
69/// video from the internet. The only difference is that the [`Stream`]s [`url`]
70/// will eventually expire.
71///
72/// [`SignatureCipher`]: crate::video_info::player_response::streaming_data::SignatureCipher
73/// [`s`]: crate::video_info::player_response::streaming_data::SignatureCipher::s
74/// [`url`]: crate::video_info::player_response::streaming_data::SignatureCipher::url
75/// [`VideoFetcher::fetch`]: crate::fetcher::VideoFetcher::fetch
76/// [`VideoFetcher`]: crate::fetcher::VideoFetcher
77/// [`VideoFetcher::fetch`]: crate::fetcher::VideoFetcher::fetch
78#[derive(Clone, derive_more::Display, derivative::Derivative)]
79#[display(fmt = "VideoDescrambler({})", "video_info.player_response.video_details.video_id")]
80#[derivative(Debug, PartialEq, Eq)]
81pub struct VideoDescrambler {
82 pub(crate) video_info: VideoInfo,
83 #[derivative(Debug = "ignore", PartialEq = "ignore")]
84 pub(crate) client: Client,
85 pub(crate) js: String,
86}
87
88impl VideoDescrambler {
89 /// Descrambles the data fetched by YouTubeFetcher.
90 /// For more information have a look at the [`Video`] documentation.
91 ///
92 /// ### Errors
93 /// - When the streaming data of the video is incomplete.
94 /// - When descrambling the videos signatures fails.
95 #[log_derive::logfn(ok = "Trace", err = "Error")]
96 #[log_derive::logfn_inputs(Trace)]
97 pub fn descramble(mut self) -> crate::Result<Video> {
98 let streaming_data = self.video_info.player_response.streaming_data
99 .as_mut()
100 .ok_or_else(|| Error::Custom(
101 "VideoInfo contained no StreamingData, which is essential for downloading.".into()
102 ))?;
103
104 if let Some(ref adaptive_fmts_raw) = self.video_info.adaptive_fmts_raw {
105 // fixme: this should probably be part of fetch.
106 apply_descrambler_adaptive_fmts(streaming_data, adaptive_fmts_raw)?;
107 }
108
109 apply_signature(streaming_data, &self.js)?;
110 let mut streams = Vec::new();
111 Self::initialize_streams(
112 streaming_data,
113 &mut streams,
114 &self.client,
115 &self.video_info.player_response.video_details,
116 );
117
118 Ok(Video {
119 video_info: self.video_info,
120 streams,
121 })
122 }
123
124 /// The [`VideoInfo`] of the video.
125 #[inline]
126 pub fn video_info(&self) -> &VideoInfo {
127 &self.video_info
128 }
129
130 /// The [`VideoDetails`] of the video.
131 #[inline]
132 pub fn video_details(&self) -> &VideoDetails {
133 &self.video_info.player_response.video_details
134 }
135
136 /// The [`Id`](crate::Id) of the video.
137 #[inline]
138 pub fn video_id(&self) -> &IdBuf {
139 &self.video_details().video_id
140 }
141
142 /// The title of the video.
143 #[inline]
144 pub fn video_title(&self) -> &String {
145 &self.video_details().title
146 }
147
148 /// Consumes all [`RawFormat`]s and constructs [`Stream`]s from them.
149 #[inline]
150 fn initialize_streams(
151 streaming_data: &mut StreamingData,
152 streams: &mut Vec<Stream>,
153 client: &Client,
154 video_details: &Arc<VideoDetails>,
155 ) {
156 for raw_format in streaming_data.formats.drain(..).chain(streaming_data.adaptive_formats.drain(..)) {
157 let stream = Stream::from_raw_format(
158 raw_format,
159 client.clone(),
160 Arc::clone(video_details),
161 );
162 streams.push(stream);
163 }
164 }
165}
166
167/// Extracts the [`RawFormat`]s from `adaptive_fmts_raw`. (This may be a legacy thing)
168#[inline]
169fn apply_descrambler_adaptive_fmts(streaming_data: &mut StreamingData, adaptive_fmts_raw: &str) -> crate::Result<()> {
170 for raw_fmt in adaptive_fmts_raw.split(',') {
171 // fixme: this implementation is likely wrong.
172 // main question: is adaptive_fmts_raw a list of normal RawFormats?
173 // To make is correct, I would need sample data for adaptive_fmts_raw
174 log::warn!(
175 "`apply_descrambler_adaptive_fmts` is probaply broken!\
176 Please open an issue on GitHub and paste in the whole warning message (it may be quite long).\
177 adaptive_fmts_raw: `{}`", raw_fmt
178 );
179 let raw_format = serde_qs::from_str::<RawFormat>(raw_fmt)?;
180 streaming_data.formats.push(raw_format);
181 }
182
183 Ok(())
184}
185
186/// Descrambles the signature of a video.
187#[inline]
188fn apply_signature(streaming_data: &mut StreamingData, js: &str) -> crate::Result<()> {
189 let cipher = Cipher::from_js(js)?;
190
191 for raw_format in streaming_data.formats.iter_mut().chain(streaming_data.adaptive_formats.iter_mut()) {
192 let url = &mut raw_format.signature_cipher.url;
193 let s = match raw_format.signature_cipher.s {
194 Some(ref mut s) => s,
195 None if url_already_contains_signature(url) => continue,
196 None => return Err(Error::UnexpectedResponse(
197 "RawFormat did not contain a signature (s), nor did the url".into()
198 ))
199 };
200
201 cipher.decrypt_signature(s)?;
202 url
203 .query_pairs_mut()
204 .append_pair("sig", s);
205 }
206
207 Ok(())
208}
209
210/// Checks whether or not the video url is already signed.
211#[inline]
212fn url_already_contains_signature(url: &Url) -> bool {
213 let url = url.as_str();
214 url.contains("signature") || (url.contains("&sig=") || url.contains("&lsig="))
215}