1#![doc = include_str!("../README.md")]
2#![warn(missing_docs, clippy::todo, clippy::dbg_macro)]
3
4use std::io::{Cursor, Read, Write};
5use std::ops::DerefMut;
6use std::path::PathBuf;
7use std::time::Instant;
8use std::{fs::File, io::BufReader, ops::Deref, path::Path, sync::OnceLock};
9
10use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
11use deno_core::{v8, JsRuntime, JsRuntimeForSnapshot, RuntimeOptions};
12use reqwest::{header, Client, Url};
13use serde::{Deserialize, Serialize};
14use time::{Duration, OffsetDateTime};
15
16use crate::runtime::TPerm;
17
18mod error;
19mod protobuf;
20mod runtime;
21
22pub use crate::error::Error;
23
24enum Rt {
25 FromSnapshot(JsRuntime),
26 Snapshotting(JsRuntimeForSnapshot),
27 NoSnapshot(JsRuntime),
28}
29
30impl Deref for Rt {
31 type Target = JsRuntime;
32
33 fn deref(&self) -> &Self::Target {
34 match self {
35 Rt::FromSnapshot(rt) | Rt::NoSnapshot(rt) => rt,
36 Rt::Snapshotting(rt) => rt,
37 }
38 }
39}
40
41impl DerefMut for Rt {
42 fn deref_mut(&mut self) -> &mut Self::Target {
43 match self {
44 Rt::FromSnapshot(rt) | Rt::NoSnapshot(rt) => rt,
45 Rt::Snapshotting(rt) => rt,
46 }
47 }
48}
49
50#[derive(Default)]
52pub struct BotguardBuilder<'a> {
53 snapshot_path: Option<&'a Path>,
54 user_agent: Option<&'a str>,
55}
56
57pub struct Botguard {
59 rt: Rt,
60 snapshot_path: Option<PathBuf>,
61 created_at: OffsetDateTime,
62 lifetime: u32,
63}
64
65#[derive(Debug, Serialize, Deserialize)]
66struct SnapshotInfo {
67 rustypipe_botguard: String,
68 v8: String,
69 #[serde(with = "time::serde::rfc3339")]
70 created_at: OffsetDateTime,
71 lifetime: u32,
72}
73
74struct SnapshotData {
75 data: Box<[u8]>,
76 info: SnapshotInfo,
77}
78
79#[derive(Debug, Serialize, Deserialize)]
80#[serde(rename_all = "camelCase")]
81struct ChallengeData {
82 #[serde(flatten)]
83 interpreter_js: InterpreterJs,
84 program: String,
85 global_name: String,
86}
87
88#[derive(Debug)]
89struct ResolvedChallengeData {
90 interpreter_js: String,
91 program: String,
92 global_name: String,
93}
94
95#[derive(Debug, Serialize, Deserialize)]
96#[serde(rename_all = "camelCase")]
97enum InterpreterJs {
98 InterpreterUrl {
99 #[serde(rename = "privateDoNotAccessOrElseTrustedResourceUrlWrappedValue")]
100 url: String,
101 },
102 InterpreterJavascript {
103 #[serde(rename = "privateDoNotAccessOrElseSafeScriptWrappedValue")]
104 script: String,
105 },
106}
107
108impl SnapshotInfo {
109 fn is_valid(&self) -> bool {
110 self.rustypipe_botguard == VERSION
111 && self.v8 == v8::VERSION_STRING
112 && (self.created_at
113 + time::Duration::seconds(i64::from(self.lifetime).saturating_sub(600))
114 > OffsetDateTime::now_utc())
115 }
116}
117
118pub const VERSION: &str = env!("CARGO_PKG_VERSION");
120const SNAPSHOT_MAGIC: u32 = 0x18cba459;
122
123const DEFAULT_UA: &str =
125 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36(KHTML, like Gecko)";
126const GOOG_API_KEY: &str = "AIzaSyDyT5W0Jh49F30Pqqtyfdf7pDLFKLJoAnw";
127const REQUEST_KEY: &str = "O43z0dpjhgX20SCx4KAo";
128
129const CONTENT_TYPE: &str = "application/json+protobuf";
130const X_USER_AGENT: &str = "grpc-web-javascript/0.1";
131
132static SNAPSHOT_DATA: OnceLock<SnapshotData> = OnceLock::new();
133
134impl<'a> BotguardBuilder<'a> {
135 #[must_use]
139 pub fn new() -> Self {
140 Self::default()
141 }
142
143 #[must_use]
145 pub fn snapshot_path(mut self, snapshot_path: &'a Path) -> Self {
146 self.snapshot_path = Some(snapshot_path);
147 self
148 }
149
150 #[must_use]
152 pub fn snapshot_path_opt(mut self, snapshot_path: Option<&'a Path>) -> Self {
153 self.snapshot_path = snapshot_path;
154 self
155 }
156
157 #[must_use]
162 pub fn user_agent(mut self, user_agent: &'a str) -> Self {
163 self.user_agent = Some(user_agent);
164 self
165 }
166
167 #[must_use]
172 pub fn user_agent_opt(mut self, user_agent: Option<&'a str>) -> Self {
173 self.user_agent = user_agent;
174 self
175 }
176
177 pub async fn init(self) -> Result<Botguard, Error> {
183 if let Some(snapshot_path) = &self.snapshot_path {
184 if SNAPSHOT_DATA.get().is_none() && snapshot_path.is_file() {
185 match read_snapshot_file(snapshot_path) {
186 Ok(Some(snapshot)) => {
187 log::info!("loaded snapshot data ({} bytes)", snapshot.data.len());
188 _ = SNAPSHOT_DATA.set(snapshot);
189 }
190 Ok(None) => {}
191 Err(e) => {
192 log::error!("{e}");
193 }
194 }
195 }
196
197 if let Some(snapshot) = SNAPSHOT_DATA.get() {
198 return Ok(Botguard {
199 rt: Rt::FromSnapshot(JsRuntime::try_new(RuntimeOptions {
200 extensions: rt_extensions(true),
201 startup_snapshot: Some(&snapshot.data),
202 ..Default::default()
203 })?),
204 snapshot_path: self.snapshot_path.map(Path::to_owned),
205 created_at: snapshot.info.created_at,
206 lifetime: snapshot.info.lifetime,
207 });
208 }
209 }
210
211 let client = Client::builder()
212 .user_agent(self.user_agent.unwrap_or(DEFAULT_UA))
213 .gzip(true)
214 .brotli(true)
215 .build()?;
216
217 let mut last_err = Error::InvalidChallenge("cannot init".into());
218 for _ in 0..3 {
219 match self.try_init(&client).await {
220 Ok(bg) => {
221 return Ok(bg);
222 }
223 Err(e) => {
224 log::error!("{e}");
225 last_err = e;
226 }
227 }
228 }
229 Err(last_err)
230 }
231
232 async fn try_init(&self, client: &Client) -> Result<Botguard, Error> {
233 let mut rt = if self.snapshot_path.is_some() {
234 Rt::Snapshotting(JsRuntimeForSnapshot::try_new(RuntimeOptions {
235 extensions: rt_extensions(false),
236 ..Default::default()
237 })?)
238 } else {
239 Rt::NoSnapshot(JsRuntime::try_new(RuntimeOptions {
240 extensions: rt_extensions(false),
241 ..Default::default()
242 })?)
243 };
244
245 rt.load_code().await?;
246
247 let created_at = OffsetDateTime::now_utc();
248 let challenge_data = get_challenge(client).await?;
249 let challenge_data = resolve_challenge_data(client, challenge_data).await?;
250
251 let bg_response = rt
253 .call_js_fn_str(
254 b"runBotguard",
255 &[
256 &challenge_data.interpreter_js,
257 &challenge_data.program,
258 &challenge_data.global_name,
259 DEFAULT_UA,
260 ],
261 )
262 .await?;
263
264 let resp = client
265 .post("https://www.youtube.com/api/jnn/v1/GenerateIT")
266 .header(header::CONTENT_TYPE, CONTENT_TYPE)
267 .header("x-goog-api-key", GOOG_API_KEY)
268 .header("x-user-agent", X_USER_AGENT)
269 .json(&[REQUEST_KEY, &bg_response])
270 .send()
271 .await?
272 .error_for_status()?
273 .json::<serde_json::Value>()
274 .await?;
275 let resp_array = resp
276 .as_array()
277 .ok_or(Error::InvalidResponse("array expected".into()))?;
278 let integrity_token = resp_array[0].as_str().ok_or(Error::InvalidResponse(
279 "could not get integrity token".into(),
280 ))?;
281 let lifetime = resp_array[1]
282 .as_u64()
283 .ok_or(Error::InvalidResponse("could not get lifetime".into()))?;
284
285 rt.call_js_fn(b"newMinter", &[integrity_token]).await?;
286
287 {
289 let vdata = "Cgs4bFZSaUotYTYtQSiJnvu8BjIKCgJERRIEEgAgFw==";
290 let po_token = rt.call_js_fn_str(b"mint", &[vdata]).await?;
291 validate_potoken(&po_token, vdata)
292 .map_err(|e| Error::InvalidPoToken(format!("check failed: {e}").into()))?;
293 }
294
295 Ok(Botguard {
296 rt,
297 snapshot_path: self.snapshot_path.map(Path::to_owned),
298 created_at,
299 lifetime: lifetime as u32,
300 })
301 }
302}
303
304impl Botguard {
305 #[must_use]
309 pub fn builder<'a>() -> BotguardBuilder<'a> {
310 BotguardBuilder::new()
311 }
312
313 pub fn is_from_snapshot(&self) -> bool {
315 matches!(self.rt, Rt::FromSnapshot(_))
316 }
317
318 pub fn created_at(&self) -> OffsetDateTime {
320 self.created_at
321 }
322
323 pub fn lifetime(&self) -> u32 {
325 self.lifetime
326 }
327
328 pub fn valid_until(&self) -> OffsetDateTime {
330 self.created_at + Duration::seconds(self.lifetime.into())
331 }
332
333 pub async fn mint_token(&mut self, ident: &str) -> Result<String, Error> {
340 let ident_urldec = urlencoding::decode(ident).unwrap_or(ident.into());
341 let po_token = self.rt.call_js_fn_str(b"mint", &[&ident_urldec]).await?;
342 validate_potoken(&po_token, &ident_urldec)?;
343 Ok(po_token)
344 }
345
346 pub async fn write_snapshot(self) -> bool {
355 if let Rt::Snapshotting(rt) = self.rt {
356 let mark = Instant::now();
357 let snapshot = rt.snapshot();
358 log::info!(
359 "Snapshot size: {}, took {:#?}",
360 snapshot.len(),
361 mark.elapsed(),
362 );
363 let info = SnapshotInfo {
364 rustypipe_botguard: VERSION.to_owned(),
365 v8: v8::VERSION_STRING.to_owned(),
366 created_at: self.created_at,
367 lifetime: self.lifetime,
368 };
369 match write_snapshot_file(self.snapshot_path.as_deref().unwrap(), &info, &snapshot) {
370 Ok(_) => {
371 log::debug!("snapshot written to {:?}", self.snapshot_path);
372 true
373 }
374 Err(e) => {
375 log::error!("could not write snapshot: {e}");
376 false
377 }
378 }
379 } else {
380 false
381 }
382 }
383}
384
385impl Rt {
386 async fn call_js_fn(
387 &mut self,
388 function: &'static [u8],
389 args: &[&str],
390 ) -> Result<v8::Global<v8::Value>, Error> {
391 let js_fn: v8::Global<v8::Function> = {
392 let context = self.main_context();
393 let scope = &mut self.handle_scope();
394 let context_local = v8::Local::new(scope, context);
395 let global_obj = context_local.global(scope);
396 let name_str = v8::String::new_external_onebyte_static(scope, function).unwrap();
397 let func = global_obj
398 .get(scope, name_str.into())
399 .and_then(|x| x.try_cast().ok())
400 .ok_or_else(|| {
401 Error::Js(
402 format!("function {} not found", String::from_utf8_lossy(function)).into(),
403 )
404 })?;
405 v8::Global::new(scope, func)
406 };
407
408 let arg_values = {
409 let scope = &mut self.handle_scope();
410 args.iter()
411 .map(|arg| {
412 let s = v8::String::new(scope, arg)
413 .ok_or(Error::Js("could not create arg".into()))?;
414 Ok(v8::Global::new(scope, s.cast()))
415 })
416 .collect::<Result<Vec<v8::Global<v8::Value>>, Error>>()
417 }?;
418 let result_fut = self.call_with_args(&js_fn, &arg_values);
419 let res = self
420 .with_event_loop_promise(result_fut, Default::default())
421 .await?;
422 Ok(res)
423 }
424
425 async fn call_js_fn_str(
426 &mut self,
427 function: &'static [u8],
428 args: &[&str],
429 ) -> Result<String, Error> {
430 let res = self.call_js_fn(function, args).await?;
431 let scope = &mut self.handle_scope();
432 Ok(res.open(scope).to_rust_string_lossy(scope))
433 }
434
435 async fn load_code(&mut self) -> Result<(), Error> {
436 let code = bg_bundle();
437 let mid = self
438 .load_main_es_module_from_code(&Url::parse("file:///bg_bundle.min.js").unwrap(), code)
439 .await
440 .unwrap();
441 let mut receiver = self.mod_evaluate(mid);
442 tokio::select! {
443 biased;
446
447 maybe_result = &mut receiver => {
448 log::debug!("received module evaluate {:#?}", maybe_result);
449 maybe_result
450 }
451
452 event_loop_result = self.run_event_loop(Default::default()) => {
453 event_loop_result.unwrap();
454 receiver.await
455 }
456 }?;
457 Ok(())
458 }
459}
460
461fn bg_bundle() -> String {
462 let bg_bundle: &[u8] = include_bytes!("../js/bg_bundle.min.js.br");
463 let mut res = Vec::new();
464 brotli::BrotliDecompress(&mut Cursor::new(bg_bundle), &mut res).unwrap();
465 unsafe { String::from_utf8_unchecked(res) }
466}
467
468fn rt_extensions(from_snapshot: bool) -> Vec<deno_core::Extension> {
469 if from_snapshot {
470 vec![
471 deno_webidl::deno_webidl::init_ops(),
472 deno_console::deno_console::init_ops(),
473 deno_url::deno_url::init_ops(),
474 deno_web::deno_web::init_ops::<TPerm>(Default::default(), None),
475 crate::runtime::runtime::init_ops(),
476 ]
477 } else {
478 vec![
479 deno_webidl::deno_webidl::init_ops_and_esm(),
480 deno_console::deno_console::init_ops_and_esm(),
481 deno_url::deno_url::init_ops_and_esm(),
482 deno_web::deno_web::init_ops_and_esm::<TPerm>(Default::default(), None),
483 crate::runtime::runtime::init_ops_and_esm(),
484 ]
485 }
486}
487
488fn read_snapshot_file(path: &Path) -> Result<Option<SnapshotData>, Error> {
499 let mut reader = BufReader::new(File::open(path)?);
500 let magic = reader.read_u32::<BigEndian>()?;
501 if magic != SNAPSHOT_MAGIC {
502 return Err(Error::InvalidSnapshot("incorrect magic number".into()));
503 }
504
505 let info_len = reader.read_u32::<BigEndian>()?;
506 let mut info_bytes = vec![0; info_len as usize];
507 reader.read_exact(&mut info_bytes)?;
508 let info = serde_json::from_slice::<SnapshotInfo>(&info_bytes)
509 .map_err(|e| Error::InvalidSnapshot(e.to_string().into()))?;
510 if !info.is_valid() {
511 return Ok(None);
512 }
513
514 let data_len = reader.read_u32::<BigEndian>()? as usize;
515 let mut data = Vec::with_capacity(data_len);
516 reader.read_to_end(&mut data)?;
517 if data.len() != data_len {
518 return Err(Error::InvalidSnapshot("incomplete data".into()));
519 }
520
521 Ok(Some(SnapshotData {
522 data: data.into_boxed_slice(),
523 info,
524 }))
525}
526
527fn write_snapshot_file(path: &Path, info: &SnapshotInfo, data: &[u8]) -> Result<(), Error> {
528 let info =
529 serde_json::to_string(info).map_err(|e| Error::InvalidSnapshot(e.to_string().into()))?;
530
531 let mut writer = File::create(path)?;
532 writer.write_u32::<BigEndian>(SNAPSHOT_MAGIC)?;
533 writer.write_u32::<BigEndian>(
534 info.len()
535 .try_into()
536 .map_err(|_| Error::InvalidSnapshot("info header too long".into()))?,
537 )?;
538 writer.write_all(info.as_bytes())?;
539 writer.write_u32::<BigEndian>(
540 data.len()
541 .try_into()
542 .map_err(|_| Error::InvalidSnapshot("snapshot too long".into()))?,
543 )?;
544 writer.write_all(data)?;
545 Ok(())
546}
547
548async fn get_challenge(client: &Client) -> Result<ChallengeData, Error> {
549 let resp = client
550 .post("https://www.youtube.com/api/jnn/v1/Create")
551 .header(header::CONTENT_TYPE, CONTENT_TYPE)
552 .header("x-goog-api-key", GOOG_API_KEY)
553 .header("x-user-agent", X_USER_AGENT)
554 .json(&[REQUEST_KEY])
555 .send()
556 .await?
557 .error_for_status()?
558 .json::<serde_json::Value>()
559 .await?;
560 let resp_arr = resp
561 .as_array()
562 .ok_or(Error::InvalidChallenge("array expected".into()))?;
563 if let Some(scrambled) = resp_arr.get(1).and_then(|x| x.as_str()) {
564 let descrambled = descramble(scrambled)
565 .map_err(|e| Error::InvalidChallenge(format!("descramble: {e}").into()))?;
566 let cdata = serde_json::from_slice::<Vec<serde_json::Value>>(&descrambled)
567 .map_err(|e| Error::InvalidChallenge(e.to_string().into()))?;
568 parse_challenge_data(&cdata)
569 } else if let Some(obj) = resp_arr.first().and_then(|x| x.as_array()) {
570 parse_challenge_data(obj)
571 } else {
572 Err(Error::InvalidChallenge("invalid format".into()))
573 }
574}
575
576async fn resolve_challenge_data(
577 client: &Client,
578 challenge_data: ChallengeData,
579) -> Result<ResolvedChallengeData, Error> {
580 let interpreter_js = match challenge_data.interpreter_js {
581 InterpreterJs::InterpreterUrl { url } => {
582 let url = Url::parse(&format!("https:{url}"))
583 .or_else(|_| Url::parse(&url))
584 .map_err(|e| Error::InvalidChallenge(format!("{e}: {url}").into()))?;
585 let domain = url
586 .domain()
587 .ok_or(Error::InvalidChallenge("no domain".into()))?;
588 let domain = domain.strip_prefix("www.").unwrap_or(domain);
589 if !matches!(domain, "google.com" | "youtube.com") {
590 return Err(Error::InvalidChallenge(
591 format!("invalid domain: {domain}").into(),
592 ));
593 }
594
595 client
596 .get(url)
597 .send()
598 .await?
599 .error_for_status()?
600 .text()
601 .await?
602 }
603 InterpreterJs::InterpreterJavascript { script } => script,
604 };
605
606 Ok(ResolvedChallengeData {
607 interpreter_js,
608 program: challenge_data.program,
609 global_name: challenge_data.global_name,
610 })
611}
612
613fn parse_challenge_data(cdata: &[serde_json::Value]) -> Result<ChallengeData, Error> {
614 if cdata.len() < 6 {
615 return Err(Error::InvalidChallenge("array len < 6".into()));
616 }
617
618 let interpreter_js = cdata[1]
619 .as_array()
620 .and_then(|a| {
621 a.iter()
622 .find_map(|itm| itm.as_str().filter(|s| !s.is_empty()))
623 })
624 .map(|s| InterpreterJs::InterpreterJavascript {
625 script: s.to_owned(),
626 })
627 .or_else(|| {
628 cdata[2]
629 .as_array()
630 .and_then(|a| {
631 a.iter()
632 .find_map(|itm| itm.as_str().filter(|s| !s.is_empty()))
633 })
634 .map(|url| InterpreterJs::InterpreterUrl {
635 url: url.to_owned(),
636 })
637 })
638 .ok_or(Error::InvalidChallenge("interpreterJs".into()))?;
639
640 let program = cdata[4]
641 .as_str()
642 .ok_or(Error::InvalidChallenge("program".into()))?;
643 let global_name = cdata[5]
644 .as_str()
645 .ok_or(Error::InvalidChallenge("globalName".into()))?;
646
647 Ok(ChallengeData {
648 interpreter_js,
649 program: program.to_owned(),
650 global_name: global_name.to_owned(),
651 })
652}
653
654fn descramble(scrambled_challenge: &str) -> Result<Vec<u8>, data_encoding::DecodeError> {
655 let bts = data_encoding::BASE64.decode(scrambled_challenge.as_bytes())?;
656 Ok(bts.into_iter().map(|x| x.wrapping_add(97)).collect())
657}
658
659fn validate_potoken(po_token: &str, ident: &str) -> Result<(), Error> {
660 let token_bts = data_encoding::BASE64URL
661 .decode(po_token.as_bytes())
662 .map_err(|e| Error::InvalidPoToken(format!("invalid b64: {e}").into()))?;
663
664 let data_bts = protobuf::bytes_from_pb(token_bts, 6).ok_or_else(|| {
665 Error::InvalidPoToken(format!("could not parse protobuf: {po_token}").into())
666 })?;
667
668 if data_bts.len() != ident.len() + 72 {
669 return Err(Error::InvalidPoToken(
670 format!("invalid length: {po_token}").into(),
671 ));
672 }
673 Ok(())
674}
675
676#[cfg(test)]
677mod tests {
678 use crate::Botguard;
679
680 use temp_testdir::TempDir;
681
682 async fn _mint_token(bg: &mut Botguard) {
683 bg.mint_token("CgswRkprS3VKM1dlNCjX6Iy9BjIKCgJERRIEEgAgOw%3D%3D")
684 .await
685 .unwrap();
686 }
687
688 #[tokio::test]
689 async fn test_botguard() {
690 let td = TempDir::default();
691 let mut snapshot_path = td.to_path_buf();
692 snapshot_path.push("bg_snapshot.bin");
693
694 let mut bg = Botguard::builder()
695 .snapshot_path(&snapshot_path)
696 .init()
697 .await
698 .unwrap();
699 _mint_token(&mut bg).await;
700 let cdate = bg.created_at();
701 let valid_until = bg.valid_until();
702 assert!(!bg.is_from_snapshot());
703 assert!(bg.write_snapshot().await);
704
705 let mut bg = Botguard::builder()
706 .snapshot_path(&snapshot_path)
707 .init()
708 .await
709 .unwrap();
710 assert!(bg.is_from_snapshot());
711 assert_eq!(bg.created_at(), cdate);
712 assert_eq!(bg.valid_until(), valid_until);
713 _mint_token(&mut bg).await;
714 }
715}