twitter_archive/structs/
personalization.rs

1#!/usr/bin/env rust
2
3//! Tweeter archives as of 2023-08-31 have private data found under;
4//!
5//!   twitter-<DATE>-<UID>.zip:data/personalization.js
6//!
7//! ## Warnings
8//!
9//! - `.[].<KEY_NAME>.LocationHistory` data structure is subject to future changes
10//!
11//! ## Example file reader
12//!
13//! ```no_build
14//! use std::io::Read;
15//! use std::{fs, path};
16//! use zip::read::ZipArchive;
17//!
18//! use twitter_archive::structs::personalization;
19//!
20//! fn main() {
21//!     let input_file = "~/Downloads/twitter-archive.zip";
22//!
23//!     let file_descriptor = fs::File::open(input_file).expect("Unable to read --input-file");
24//!     let mut zip_archive = ZipArchive::new(file_descriptor).unwrap();
25//!     let mut zip_file = zip_archive.by_name("data/personalization.js").unwrap();
26//!     let mut buff = String::new();
27//!     zip_file.read_to_string(&mut buff).unwrap();
28//!
29//!     let json = buff.replacen("window.YTD.personalization.part0 = ", "", 1);
30//!     let data: Vec<personalization::P13nDataObject> = serde_json::from_str(&json).expect("Unable to parse");
31//!
32//!     for (index, object) in data.iter().enumerate() {
33//!         /* Do stuff with each `p13nData` entry */
34//!         println!("Personalization index: {index}");
35//!         println!("Demographics language: {}", object.p13n_data.demographics.languages[0].language);
36//!         println!("Demographics gender: {}", object.p13n_data.demographics.gender_info.gender);
37//!     }
38//! }
39//! ```
40//!
41//! ## Example `twitter-<DATE>-<UID>.zip:data/personalization.js` content
42//!
43//! ```javascript
44//! window.YTD.personalization.part0 = [
45//!   {
46//!     "device" : {
47//!       "phoneNumber" : "+15551234567"
48//!     }
49//!   }
50//! ]
51//! ```
52
53use derive_more::Display;
54use serde::{Deserialize, Serialize};
55
56use crate::convert;
57
58/// ## Example
59///
60/// ```
61/// use twitter_archive::structs::personalization::P13nDataObject;
62///
63/// let json = r##"{
64///   "p13nData": {
65///     "demographics": {
66///       "languages": [
67///         {
68///           "language": "English",
69///           "isDisabled": false
70///         }
71///       ],
72///       "genderInfo": {
73///         "gender": "unknown",
74///         "genderOverride": "Borg"
75///       }
76///     },
77///     "interests": {
78///       "interests": [
79///         {
80///           "name": "#HappyFriday",
81///           "isDisabled": false
82///         }
83///       ],
84///       "partnerInterests": [],
85///       "audienceAndAdvertisers": {
86///         "lookalikeAdvertisers": [
87///           "@EXAMPLE_ONE",
88///           "@EXAMPLE_TWO"
89///         ],
90///         "advertisers": [],
91///         "doNotReachAdvertisers": [],
92///         "catalogAudienceAdvertisers": [],
93///         "numAudiences": "0"
94///       },
95///       "shows": [
96///         "1899",
97///         "DuckTales"
98///       ]
99///     },
100///     "locationHistory": [],
101///     "inferredAgeInfo": {
102///       "age": [
103///         "13-99"
104///       ],
105///       "birthDate": ""
106///     }
107///   }
108/// }"##;
109///
110/// let data: P13nDataObject = serde_json::from_str(&json).unwrap();
111///
112/// // De-serialized properties
113/// assert_eq!(data.p13n_data.demographics.languages[0].language, "English");
114/// assert_eq!(data.p13n_data.demographics.languages[0].is_disabled, false);
115/// assert_eq!(data.p13n_data.demographics.gender_info.gender, "unknown");
116/// assert_eq!(data.p13n_data.demographics.gender_info.gender_override, "Borg");
117///
118/// assert_eq!(data.p13n_data.interests.interests[0].name, "#HappyFriday");
119/// assert_eq!(data.p13n_data.interests.interests[0].is_disabled, false);
120/// assert_eq!(data.p13n_data.interests.partner_interests.len(), 0);
121/// assert_eq!(data.p13n_data.interests.audience_and_advertisers.lookalike_advertisers.len(), 2);
122/// assert_eq!(data.p13n_data.interests.audience_and_advertisers.advertisers.len(), 0);
123/// assert_eq!(data.p13n_data.interests.audience_and_advertisers.do_not_reach_advertisers.len(), 0);
124/// assert_eq!(data.p13n_data.interests.audience_and_advertisers.catalog_audience_advertisers.len(), 0);
125/// assert_eq!(data.p13n_data.interests.audience_and_advertisers.num_audiences, 0);
126/// assert_eq!(data.p13n_data.interests.shows[0], "1899");
127/// assert_eq!(data.p13n_data.interests.shows[1], "DuckTales");
128///
129/// assert_eq!(data.p13n_data.location_history.len(), 0);
130///
131/// assert_eq!(data.p13n_data.inferred_age_info.age[0], "13-99");
132/// assert_eq!(data.p13n_data.inferred_age_info.birth_date, "");
133///
134/// // Re-serialize is equivalent to original data
135/// assert_eq!(serde_json::to_string_pretty(&data).unwrap(), json);
136/// ```
137#[derive(Deserialize, Serialize, Debug, Clone, Display)]
138#[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
139#[serde(rename_all = "camelCase")]
140pub struct P13nDataObject {
141	/// ## Example JSON data
142	///
143	/// ```json
144	/// {
145	///   "p13nData": {
146	///     "demographics": {
147	///       "languages": [
148	///         {
149	///           "language": "English",
150	///           "isDisabled": false
151	///         }
152	///       ],
153	///       "genderInfo": {
154	///         "gender": "unknown",
155	///         "genderOverride": "Borg"
156	///       }
157	///     },
158	///     "interests": {
159	///       "interests": [
160	///         {
161	///           "name": "#HappyFriday",
162	///           "isDisabled": false
163	///         }
164	///       ],
165	///       "partnerInterests": [],
166	///       "audienceAndAdvertisers": {
167	///         "lookalikeAdvertisers": [
168	///           "@EXAMPLE_ONE",
169	///           "@EXAMPLE_TWO"
170	///         ],
171	///         "advertisers": [],
172	///         "doNotReachAdvertisers": [],
173	///         "catalogAudienceAdvertisers": [],
174	///         "numAudiences": "0"
175	///       },
176	///       "shows": [
177	///         "1899",
178	///         "DuckTales"
179	///       ]
180	///     },
181	///     "locationHistory": [],
182	///     "inferredAgeInfo": {
183	///       "age": [
184	///         "13-99"
185	///       ],
186	///       "birthDate": ""
187	///     }
188	///   }
189	/// }
190	/// ```
191	pub p13n_data: P13nData,
192}
193
194/// ## Example
195///
196/// ```
197/// use twitter_archive::structs::personalization::P13nData;
198///
199/// let json = r##"{
200///   "demographics": {
201///     "languages": [
202///       {
203///         "language": "English",
204///         "isDisabled": false
205///       }
206///     ],
207///     "genderInfo": {
208///       "gender": "unknown",
209///       "genderOverride": "Borg"
210///     }
211///   },
212///   "interests": {
213///     "interests": [
214///       {
215///         "name": "#HappyFriday",
216///         "isDisabled": false
217///       }
218///     ],
219///     "partnerInterests": [],
220///     "audienceAndAdvertisers": {
221///       "lookalikeAdvertisers": [
222///         "@EXAMPLE_ONE",
223///         "@EXAMPLE_TWO"
224///       ],
225///       "advertisers": [],
226///       "doNotReachAdvertisers": [],
227///       "catalogAudienceAdvertisers": [],
228///       "numAudiences": "0"
229///     },
230///     "shows": [
231///       "1899",
232///       "DuckTales"
233///     ]
234///   },
235///   "locationHistory": [],
236///   "inferredAgeInfo": {
237///     "age": [
238///       "13-99"
239///     ],
240///     "birthDate": ""
241///   }
242/// }"##;
243///
244/// let data: P13nData = serde_json::from_str(&json).unwrap();
245///
246/// // De-serialized properties
247/// assert_eq!(data.demographics.languages[0].language, "English");
248/// assert_eq!(data.demographics.languages[0].is_disabled, false);
249/// assert_eq!(data.demographics.gender_info.gender, "unknown");
250/// assert_eq!(data.demographics.gender_info.gender_override, "Borg");
251///
252/// assert_eq!(data.interests.interests[0].name, "#HappyFriday");
253/// assert_eq!(data.interests.interests[0].is_disabled, false);
254/// assert_eq!(data.interests.partner_interests.len(), 0);
255/// assert_eq!(data.interests.audience_and_advertisers.lookalike_advertisers.len(), 2);
256/// assert_eq!(data.interests.audience_and_advertisers.advertisers.len(), 0);
257/// assert_eq!(data.interests.audience_and_advertisers.do_not_reach_advertisers.len(), 0);
258/// assert_eq!(data.interests.audience_and_advertisers.catalog_audience_advertisers.len(), 0);
259/// assert_eq!(data.interests.audience_and_advertisers.num_audiences, 0);
260/// assert_eq!(data.interests.shows[0], "1899");
261/// assert_eq!(data.interests.shows[1], "DuckTales");
262///
263/// assert_eq!(data.location_history.len(), 0);
264///
265/// assert_eq!(data.inferred_age_info.age[0], "13-99");
266/// assert_eq!(data.inferred_age_info.birth_date, "");
267///
268/// // Re-serialize is equivalent to original data
269/// assert_eq!(serde_json::to_string_pretty(&data).unwrap(), json);
270/// ```
271#[derive(Deserialize, Serialize, Debug, Clone, Display)]
272#[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
273#[serde(rename_all = "camelCase")]
274pub struct P13nData {
275	/// ## Example JSON data
276	///
277	/// ```json
278	/// {
279	///   "demographics": {
280	///     "languages": [
281	///       {
282	///         "language": "English",
283	///         "isDisabled": false
284	///       }
285	///     ],
286	///     "genderInfo": {
287	///       "gender": "unknown",
288	///       "genderOverride": "Borg"
289	///     }
290	///   }
291	/// }
292	/// ```
293	pub demographics: Demographics,
294
295	/// ## Example JSON data
296	///
297	/// ```json
298	/// {
299	///   "interests": {
300	///     "interests": [
301	///       {
302	///         "name": "#HappyFriday",
303	///         "isDisabled": false
304	///       }
305	///     ],
306	///     "partnerInterests": [],
307	///     "audienceAndAdvertisers": {
308	///       "lookalikeAdvertisers": [
309	///         "@EXAMPLE_ONE",
310	///         "@EXAMPLE_TWO"
311	///       ],
312	///       "advertisers": [],
313	///       "doNotReachAdvertisers": [],
314	///       "catalogAudienceAdvertisers": [],
315	///       "numAudiences": "0"
316	///     },
317	///     "shows": [
318	///       "1899",
319	///       "DuckTales"
320	///     ]
321	///   }
322	/// }
323	/// ```
324	pub interests: Interests,
325
326	/// WARNING: this type may be wrong!
327	///
328	/// ## Example JSON data
329	///
330	/// ```json
331	/// { "locationHistory": [] }
332	/// ```
333	pub location_history: Vec<String>,
334
335	/// ## Example JSON data
336	///
337	/// ```json
338	/// {
339	///   "inferredAgeInfo": {
340	///     "age": [
341	///       "13-99"
342	///     ],
343	///     "birthDate": ""
344	///   }
345	/// }
346	/// ```
347	pub inferred_age_info: InferredAgeInfo,
348}
349
350/// ## Example
351///
352/// ```
353/// use twitter_archive::structs::personalization::Demographics;
354///
355/// let json = r#"{
356///   "languages": [
357///     {
358///       "language": "English",
359///       "isDisabled": false
360///     }
361///   ],
362///   "genderInfo": {
363///     "gender": "unknown",
364///     "genderOverride": "Borg"
365///   }
366/// }"#;
367///
368/// let data: Demographics = serde_json::from_str(&json).unwrap();
369///
370/// // De-serialized properties
371/// assert_eq!(data.languages[0].language, "English");
372/// assert_eq!(data.languages[0].is_disabled, false);
373/// assert_eq!(data.gender_info.gender, "unknown");
374/// assert_eq!(data.gender_info.gender_override, "Borg");
375///
376/// // Re-serialize is equivalent to original data
377/// assert_eq!(serde_json::to_string_pretty(&data).unwrap(), json);
378/// ```
379#[derive(Deserialize, Serialize, Debug, Clone, Display)]
380#[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
381#[serde(rename_all = "camelCase")]
382pub struct Demographics {
383	/// ## Example JSON data
384	///
385	/// ```json
386	/// {
387	///   "languages": [
388	///     {
389	///       "language": "English",
390	///       "isDisabled": false
391	///     }
392	///   ]
393	/// }
394	/// ```
395	pub languages: Vec<LanguageEntry>,
396
397	/// ## Example JSON data
398	///
399	/// ```json
400	/// {
401	///   "genderInfo": {
402	///     "gender": "unknown",
403	///     "genderOverride": "Borg"
404	///   }
405	/// }
406	/// ```
407	pub gender_info: GenderInfo,
408}
409
410/// ## Example
411///
412/// ```
413/// use twitter_archive::structs::personalization::LanguageEntry;
414///
415/// let json = r#"{
416///   "language": "English",
417///   "isDisabled": false
418/// }"#;
419///
420/// let data: LanguageEntry = serde_json::from_str(&json).unwrap();
421///
422/// // De-serialized properties
423/// assert_eq!(data.language, "English");
424/// assert_eq!(data.is_disabled, false);
425///
426/// // Re-serialize is equivalent to original data
427/// assert_eq!(serde_json::to_string_pretty(&data).unwrap(), json);
428/// ```
429#[derive(Deserialize, Serialize, Debug, Clone, Display)]
430#[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
431#[serde(rename_all = "camelCase")]
432pub struct LanguageEntry {
433	/// ## Example JSON data
434	///
435	/// ```json
436	/// { "language": "English" }
437	/// ```
438	pub language: String,
439
440	/// ## Example JSON data
441	///
442	/// ```json
443	/// { "isDisabled": false }
444	/// ```
445	pub is_disabled: bool,
446}
447
448/// ## Example
449///
450/// ```
451/// use twitter_archive::structs::personalization::GenderInfo;
452///
453/// let json = r#"{
454///   "gender": "unknown",
455///   "genderOverride": "Borg"
456/// }"#;
457///
458/// let data: GenderInfo = serde_json::from_str(&json).unwrap();
459///
460/// // De-serialized properties
461/// assert_eq!(data.gender, "unknown");
462/// assert_eq!(data.gender_override, "Borg");
463///
464/// // Re-serialize is equivalent to original data
465/// assert_eq!(serde_json::to_string_pretty(&data).unwrap(), json);
466/// ```
467#[derive(Deserialize, Serialize, Debug, Clone, Display)]
468#[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
469#[serde(rename_all = "camelCase")]
470pub struct GenderInfo {
471	/// ## Example JSON data
472	///
473	/// ```json
474	/// { "gender": "unknown" }
475	/// ```
476	pub gender: String,
477
478	/// ## Example JSON data
479	///
480	/// ```json
481	/// { "genderOverride": "Borg" }
482	/// ```
483	pub gender_override: String,
484}
485
486/// ## Example
487///
488/// ```
489/// use twitter_archive::structs::personalization::Interests;
490///
491/// let json = r##"{
492///   "interests": [
493///     {
494///       "name": "#HappyFriday",
495///       "isDisabled": false
496///     }
497///   ],
498///   "partnerInterests": [],
499///   "audienceAndAdvertisers": {
500///     "lookalikeAdvertisers": [
501///       "@EXAMPLE_ONE",
502///       "@EXAMPLE_TWO"
503///     ],
504///     "advertisers": [],
505///     "doNotReachAdvertisers": [],
506///     "catalogAudienceAdvertisers": [],
507///     "numAudiences": "0"
508///   },
509///   "shows": [
510///     "1899",
511///     "DuckTales"
512///   ]
513/// }"##;
514///
515/// let data: Interests = serde_json::from_str(&json).unwrap();
516///
517/// // De-serialized properties
518/// assert_eq!(data.interests[0].name, "#HappyFriday");
519/// assert_eq!(data.interests[0].is_disabled, false);
520/// assert_eq!(data.partner_interests.len(), 0);
521/// assert_eq!(data.audience_and_advertisers.lookalike_advertisers.len(), 2);
522/// assert_eq!(data.audience_and_advertisers.advertisers.len(), 0);
523/// assert_eq!(data.audience_and_advertisers.do_not_reach_advertisers.len(), 0);
524/// assert_eq!(data.audience_and_advertisers.catalog_audience_advertisers.len(), 0);
525/// assert_eq!(data.audience_and_advertisers.num_audiences, 0);
526/// assert_eq!(data.shows[0], "1899");
527/// assert_eq!(data.shows[1], "DuckTales");
528///
529/// // Re-serialize is equivalent to original data
530/// assert_eq!(serde_json::to_string_pretty(&data).unwrap(), json);
531/// ```
532#[derive(Deserialize, Serialize, Debug, Clone, Display)]
533#[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
534#[serde(rename_all = "camelCase")]
535pub struct Interests {
536	/// ## Example JSON data
537	///
538	/// ```json
539	/// {
540	///   "interests": [
541	///     {
542	///       "name": "#HappyFriday",
543	///       "isDisabled": false
544	///     }
545	///   ]
546	/// }
547	/// ```
548	pub interests: Vec<Interest>,
549
550	/// ## Example JSON data
551	///
552	/// ```json
553	/// { "partnerInterests": [] }
554	/// ```
555	pub partner_interests: Vec<String>,
556
557	/// ## Example JSON data
558	///
559	/// ```json
560	/// {
561	///   "audienceAndAdvertisers": {
562	///     "lookalikeAdvertisers": [
563	///       "@EXAMPLE_ONE",
564	///       "@EXAMPLE_TWO"
565	///     ],
566	///     "advertisers": [],
567	///     "doNotReachAdvertisers": [],
568	///     "catalogAudienceAdvertisers": [],
569	///     "numAudiences": "0"
570	///   }
571	/// }
572	/// ```
573	pub audience_and_advertisers: AudienceAndAdvertisers,
574
575	/// ## Example JSON data
576	///
577	/// ```json
578	/// {
579	///   "shows": [
580	///     "1899",
581	///     "DuckTales"
582	///   ]
583	/// }
584	/// ```
585	pub shows: Vec<String>,
586}
587
588/// ## Example
589///
590/// ```
591/// use twitter_archive::structs::personalization::Interest;
592///
593/// let json = r##"{
594///   "name": "#HappyFriday",
595///   "isDisabled": false
596/// }"##;
597///
598/// let data: Interest = serde_json::from_str(&json).unwrap();
599///
600/// // De-serialized properties
601/// assert_eq!(data.name, "#HappyFriday");
602/// assert_eq!(data.is_disabled, false);
603///
604/// // Re-serialize is equivalent to original data
605/// assert_eq!(serde_json::to_string_pretty(&data).unwrap(), json);
606/// ```
607#[derive(Deserialize, Serialize, Debug, Clone, Display)]
608#[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
609#[serde(rename_all = "camelCase")]
610pub struct Interest {
611	/// ## Example JSON data
612	///
613	/// ```json
614	/// { "name": "#HappyFriday" }
615	/// ```
616	pub name: String,
617
618	/// ## Example JSON data
619	///
620	/// ```json
621	/// { "isDisabled": false }
622	/// ```
623	pub is_disabled: bool,
624}
625
626/// ## Example
627///
628/// ```
629/// use twitter_archive::structs::personalization::AudienceAndAdvertisers;
630///
631/// let json = r#"{
632///   "lookalikeAdvertisers": [
633///     "@EXAMPLE_ONE",
634///     "@EXAMPLE_TWO"
635///   ],
636///   "advertisers": [],
637///   "doNotReachAdvertisers": [],
638///   "catalogAudienceAdvertisers": [],
639///   "numAudiences": "0"
640/// }"#;
641///
642/// let data: AudienceAndAdvertisers = serde_json::from_str(&json).unwrap();
643///
644/// // De-serialized properties
645/// assert_eq!(data.lookalike_advertisers[0], "@EXAMPLE_ONE");
646/// assert_eq!(data.lookalike_advertisers[1], "@EXAMPLE_TWO");
647/// assert_eq!(data.advertisers.len(), 0);
648/// assert_eq!(data.do_not_reach_advertisers.len(), 0);
649/// assert_eq!(data.catalog_audience_advertisers.len(), 0);
650/// assert_eq!(data.num_audiences, 0);
651///
652/// // Re-serialize is equivalent to original data
653/// assert_eq!(serde_json::to_string_pretty(&data).unwrap(), json);
654/// ```
655#[derive(Deserialize, Serialize, Debug, Clone, Display)]
656#[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
657#[serde(rename_all = "camelCase")]
658pub struct AudienceAndAdvertisers {
659	/// ## Example JSON data
660	///
661	/// ```json
662	/// {
663	///   "lookalikeAdvertisers": [
664	///     "@EXAMPLE_ONE",
665	///     "@EXAMPLE_TWO"
666	///   ]
667	/// }
668	/// ```
669	pub lookalike_advertisers: Vec<String>,
670
671	/// ## Example JSON data
672	///
673	/// ```json
674	/// { "advertisers": [] }
675	/// ```
676	pub advertisers: Vec<String>,
677
678	/// ## Example JSON data
679	///
680	/// ```json
681	/// { "doNotReachAdvertisers": [] }
682	/// ```
683	pub do_not_reach_advertisers: Vec<String>,
684
685	/// ## Example JSON data
686	///
687	/// ```json
688	/// { "catalogAudienceAdvertisers": [] }
689	/// ```
690	pub catalog_audience_advertisers: Vec<String>,
691
692	/// ## Example JSON data
693	///
694	/// ```json
695	/// { "numAudiences": "0" }
696	/// ```
697	#[serde(with = "convert::number_like_string")]
698	pub num_audiences: usize,
699}
700
701// TODO: find examples from which data structure(s) may be defined
702// /// ## Example
703// ///
704// /// ```
705// /// use twitter_archive::structs::personalization::LocationHistory;
706// ///
707// /// let json = r#"{ }"#;
708// ///
709// /// let data: LocationHistory = serde_json::from_str(&json).unwrap();
710// ///
711// /// // De-serialized properties
712// /// // assert_eq!(data., "");
713// ///
714// /// // Re-serialize is equivalent to original data without pretty printing
715// /// assert_eq!(serde_json::to_string(&data).unwrap(), json.replace("\n", "").replace(" ", ""));
716// /// ```
717// #[derive(Deserialize, Serialize, Debug, Clone, Display)]
718// #[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
719// #[serde(rename_all = "camelCase")]
720// pub struct LocationHistory {
721// 	todo!();
722// }
723
724/// ## Example
725///
726/// ```
727/// use twitter_archive::structs::personalization::InferredAgeInfo;
728///
729/// let json = r#"{
730///   "age": [
731///     "13-99"
732///   ],
733///   "birthDate": ""
734/// }"#;
735///
736/// let data: InferredAgeInfo = serde_json::from_str(&json).unwrap();
737///
738/// // De-serialized properties
739/// assert_eq!(data.age[0], "13-99");
740/// assert_eq!(data.birth_date, "");
741///
742/// // Re-serialize is equivalent to original data
743/// assert_eq!(serde_json::to_string_pretty(&data).unwrap(), json);
744/// ```
745#[derive(Deserialize, Serialize, Debug, Clone, Display)]
746#[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
747#[serde(rename_all = "camelCase")]
748pub struct InferredAgeInfo {
749	/// ## Example JSON data
750	///
751	/// ```json
752	/// {
753	///   "age": [
754	///     "13-99"
755	///   ]
756	/// }
757	/// ```
758	pub age: Vec<String>,
759
760	/// ## Example JSON data
761	///
762	/// ```json
763	/// { "birthDate": "" }
764	/// ```
765	pub birth_date: String,
766}