twitter_archive/structs/personalization.rs
1#!/usr/bin/env rust
2
3//! Tweeter archives as of 2023-08-31 have private data found under;
4//!
5//! twitter-<DATE>-<UID>.zip:data/personalization.js
6//!
7//! ## Warnings
8//!
9//! - `.[].<KEY_NAME>.LocationHistory` data structure is subject to future changes
10//!
11//! ## Example file reader
12//!
13//! ```no_build
14//! use std::io::Read;
15//! use std::{fs, path};
16//! use zip::read::ZipArchive;
17//!
18//! use twitter_archive::structs::personalization;
19//!
20//! fn main() {
21//! let input_file = "~/Downloads/twitter-archive.zip";
22//!
23//! let file_descriptor = fs::File::open(input_file).expect("Unable to read --input-file");
24//! let mut zip_archive = ZipArchive::new(file_descriptor).unwrap();
25//! let mut zip_file = zip_archive.by_name("data/personalization.js").unwrap();
26//! let mut buff = String::new();
27//! zip_file.read_to_string(&mut buff).unwrap();
28//!
29//! let json = buff.replacen("window.YTD.personalization.part0 = ", "", 1);
30//! let data: Vec<personalization::P13nDataObject> = serde_json::from_str(&json).expect("Unable to parse");
31//!
32//! for (index, object) in data.iter().enumerate() {
33//! /* Do stuff with each `p13nData` entry */
34//! println!("Personalization index: {index}");
35//! println!("Demographics language: {}", object.p13n_data.demographics.languages[0].language);
36//! println!("Demographics gender: {}", object.p13n_data.demographics.gender_info.gender);
37//! }
38//! }
39//! ```
40//!
41//! ## Example `twitter-<DATE>-<UID>.zip:data/personalization.js` content
42//!
43//! ```javascript
44//! window.YTD.personalization.part0 = [
45//! {
46//! "device" : {
47//! "phoneNumber" : "+15551234567"
48//! }
49//! }
50//! ]
51//! ```
52
53use derive_more::Display;
54use serde::{Deserialize, Serialize};
55
56use crate::convert;
57
58/// ## Example
59///
60/// ```
61/// use twitter_archive::structs::personalization::P13nDataObject;
62///
63/// let json = r##"{
64/// "p13nData": {
65/// "demographics": {
66/// "languages": [
67/// {
68/// "language": "English",
69/// "isDisabled": false
70/// }
71/// ],
72/// "genderInfo": {
73/// "gender": "unknown",
74/// "genderOverride": "Borg"
75/// }
76/// },
77/// "interests": {
78/// "interests": [
79/// {
80/// "name": "#HappyFriday",
81/// "isDisabled": false
82/// }
83/// ],
84/// "partnerInterests": [],
85/// "audienceAndAdvertisers": {
86/// "lookalikeAdvertisers": [
87/// "@EXAMPLE_ONE",
88/// "@EXAMPLE_TWO"
89/// ],
90/// "advertisers": [],
91/// "doNotReachAdvertisers": [],
92/// "catalogAudienceAdvertisers": [],
93/// "numAudiences": "0"
94/// },
95/// "shows": [
96/// "1899",
97/// "DuckTales"
98/// ]
99/// },
100/// "locationHistory": [],
101/// "inferredAgeInfo": {
102/// "age": [
103/// "13-99"
104/// ],
105/// "birthDate": ""
106/// }
107/// }
108/// }"##;
109///
110/// let data: P13nDataObject = serde_json::from_str(&json).unwrap();
111///
112/// // De-serialized properties
113/// assert_eq!(data.p13n_data.demographics.languages[0].language, "English");
114/// assert_eq!(data.p13n_data.demographics.languages[0].is_disabled, false);
115/// assert_eq!(data.p13n_data.demographics.gender_info.gender, "unknown");
116/// assert_eq!(data.p13n_data.demographics.gender_info.gender_override, "Borg");
117///
118/// assert_eq!(data.p13n_data.interests.interests[0].name, "#HappyFriday");
119/// assert_eq!(data.p13n_data.interests.interests[0].is_disabled, false);
120/// assert_eq!(data.p13n_data.interests.partner_interests.len(), 0);
121/// assert_eq!(data.p13n_data.interests.audience_and_advertisers.lookalike_advertisers.len(), 2);
122/// assert_eq!(data.p13n_data.interests.audience_and_advertisers.advertisers.len(), 0);
123/// assert_eq!(data.p13n_data.interests.audience_and_advertisers.do_not_reach_advertisers.len(), 0);
124/// assert_eq!(data.p13n_data.interests.audience_and_advertisers.catalog_audience_advertisers.len(), 0);
125/// assert_eq!(data.p13n_data.interests.audience_and_advertisers.num_audiences, 0);
126/// assert_eq!(data.p13n_data.interests.shows[0], "1899");
127/// assert_eq!(data.p13n_data.interests.shows[1], "DuckTales");
128///
129/// assert_eq!(data.p13n_data.location_history.len(), 0);
130///
131/// assert_eq!(data.p13n_data.inferred_age_info.age[0], "13-99");
132/// assert_eq!(data.p13n_data.inferred_age_info.birth_date, "");
133///
134/// // Re-serialize is equivalent to original data
135/// assert_eq!(serde_json::to_string_pretty(&data).unwrap(), json);
136/// ```
137#[derive(Deserialize, Serialize, Debug, Clone, Display)]
138#[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
139#[serde(rename_all = "camelCase")]
140pub struct P13nDataObject {
141 /// ## Example JSON data
142 ///
143 /// ```json
144 /// {
145 /// "p13nData": {
146 /// "demographics": {
147 /// "languages": [
148 /// {
149 /// "language": "English",
150 /// "isDisabled": false
151 /// }
152 /// ],
153 /// "genderInfo": {
154 /// "gender": "unknown",
155 /// "genderOverride": "Borg"
156 /// }
157 /// },
158 /// "interests": {
159 /// "interests": [
160 /// {
161 /// "name": "#HappyFriday",
162 /// "isDisabled": false
163 /// }
164 /// ],
165 /// "partnerInterests": [],
166 /// "audienceAndAdvertisers": {
167 /// "lookalikeAdvertisers": [
168 /// "@EXAMPLE_ONE",
169 /// "@EXAMPLE_TWO"
170 /// ],
171 /// "advertisers": [],
172 /// "doNotReachAdvertisers": [],
173 /// "catalogAudienceAdvertisers": [],
174 /// "numAudiences": "0"
175 /// },
176 /// "shows": [
177 /// "1899",
178 /// "DuckTales"
179 /// ]
180 /// },
181 /// "locationHistory": [],
182 /// "inferredAgeInfo": {
183 /// "age": [
184 /// "13-99"
185 /// ],
186 /// "birthDate": ""
187 /// }
188 /// }
189 /// }
190 /// ```
191 pub p13n_data: P13nData,
192}
193
194/// ## Example
195///
196/// ```
197/// use twitter_archive::structs::personalization::P13nData;
198///
199/// let json = r##"{
200/// "demographics": {
201/// "languages": [
202/// {
203/// "language": "English",
204/// "isDisabled": false
205/// }
206/// ],
207/// "genderInfo": {
208/// "gender": "unknown",
209/// "genderOverride": "Borg"
210/// }
211/// },
212/// "interests": {
213/// "interests": [
214/// {
215/// "name": "#HappyFriday",
216/// "isDisabled": false
217/// }
218/// ],
219/// "partnerInterests": [],
220/// "audienceAndAdvertisers": {
221/// "lookalikeAdvertisers": [
222/// "@EXAMPLE_ONE",
223/// "@EXAMPLE_TWO"
224/// ],
225/// "advertisers": [],
226/// "doNotReachAdvertisers": [],
227/// "catalogAudienceAdvertisers": [],
228/// "numAudiences": "0"
229/// },
230/// "shows": [
231/// "1899",
232/// "DuckTales"
233/// ]
234/// },
235/// "locationHistory": [],
236/// "inferredAgeInfo": {
237/// "age": [
238/// "13-99"
239/// ],
240/// "birthDate": ""
241/// }
242/// }"##;
243///
244/// let data: P13nData = serde_json::from_str(&json).unwrap();
245///
246/// // De-serialized properties
247/// assert_eq!(data.demographics.languages[0].language, "English");
248/// assert_eq!(data.demographics.languages[0].is_disabled, false);
249/// assert_eq!(data.demographics.gender_info.gender, "unknown");
250/// assert_eq!(data.demographics.gender_info.gender_override, "Borg");
251///
252/// assert_eq!(data.interests.interests[0].name, "#HappyFriday");
253/// assert_eq!(data.interests.interests[0].is_disabled, false);
254/// assert_eq!(data.interests.partner_interests.len(), 0);
255/// assert_eq!(data.interests.audience_and_advertisers.lookalike_advertisers.len(), 2);
256/// assert_eq!(data.interests.audience_and_advertisers.advertisers.len(), 0);
257/// assert_eq!(data.interests.audience_and_advertisers.do_not_reach_advertisers.len(), 0);
258/// assert_eq!(data.interests.audience_and_advertisers.catalog_audience_advertisers.len(), 0);
259/// assert_eq!(data.interests.audience_and_advertisers.num_audiences, 0);
260/// assert_eq!(data.interests.shows[0], "1899");
261/// assert_eq!(data.interests.shows[1], "DuckTales");
262///
263/// assert_eq!(data.location_history.len(), 0);
264///
265/// assert_eq!(data.inferred_age_info.age[0], "13-99");
266/// assert_eq!(data.inferred_age_info.birth_date, "");
267///
268/// // Re-serialize is equivalent to original data
269/// assert_eq!(serde_json::to_string_pretty(&data).unwrap(), json);
270/// ```
271#[derive(Deserialize, Serialize, Debug, Clone, Display)]
272#[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
273#[serde(rename_all = "camelCase")]
274pub struct P13nData {
275 /// ## Example JSON data
276 ///
277 /// ```json
278 /// {
279 /// "demographics": {
280 /// "languages": [
281 /// {
282 /// "language": "English",
283 /// "isDisabled": false
284 /// }
285 /// ],
286 /// "genderInfo": {
287 /// "gender": "unknown",
288 /// "genderOverride": "Borg"
289 /// }
290 /// }
291 /// }
292 /// ```
293 pub demographics: Demographics,
294
295 /// ## Example JSON data
296 ///
297 /// ```json
298 /// {
299 /// "interests": {
300 /// "interests": [
301 /// {
302 /// "name": "#HappyFriday",
303 /// "isDisabled": false
304 /// }
305 /// ],
306 /// "partnerInterests": [],
307 /// "audienceAndAdvertisers": {
308 /// "lookalikeAdvertisers": [
309 /// "@EXAMPLE_ONE",
310 /// "@EXAMPLE_TWO"
311 /// ],
312 /// "advertisers": [],
313 /// "doNotReachAdvertisers": [],
314 /// "catalogAudienceAdvertisers": [],
315 /// "numAudiences": "0"
316 /// },
317 /// "shows": [
318 /// "1899",
319 /// "DuckTales"
320 /// ]
321 /// }
322 /// }
323 /// ```
324 pub interests: Interests,
325
326 /// WARNING: this type may be wrong!
327 ///
328 /// ## Example JSON data
329 ///
330 /// ```json
331 /// { "locationHistory": [] }
332 /// ```
333 pub location_history: Vec<String>,
334
335 /// ## Example JSON data
336 ///
337 /// ```json
338 /// {
339 /// "inferredAgeInfo": {
340 /// "age": [
341 /// "13-99"
342 /// ],
343 /// "birthDate": ""
344 /// }
345 /// }
346 /// ```
347 pub inferred_age_info: InferredAgeInfo,
348}
349
350/// ## Example
351///
352/// ```
353/// use twitter_archive::structs::personalization::Demographics;
354///
355/// let json = r#"{
356/// "languages": [
357/// {
358/// "language": "English",
359/// "isDisabled": false
360/// }
361/// ],
362/// "genderInfo": {
363/// "gender": "unknown",
364/// "genderOverride": "Borg"
365/// }
366/// }"#;
367///
368/// let data: Demographics = serde_json::from_str(&json).unwrap();
369///
370/// // De-serialized properties
371/// assert_eq!(data.languages[0].language, "English");
372/// assert_eq!(data.languages[0].is_disabled, false);
373/// assert_eq!(data.gender_info.gender, "unknown");
374/// assert_eq!(data.gender_info.gender_override, "Borg");
375///
376/// // Re-serialize is equivalent to original data
377/// assert_eq!(serde_json::to_string_pretty(&data).unwrap(), json);
378/// ```
379#[derive(Deserialize, Serialize, Debug, Clone, Display)]
380#[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
381#[serde(rename_all = "camelCase")]
382pub struct Demographics {
383 /// ## Example JSON data
384 ///
385 /// ```json
386 /// {
387 /// "languages": [
388 /// {
389 /// "language": "English",
390 /// "isDisabled": false
391 /// }
392 /// ]
393 /// }
394 /// ```
395 pub languages: Vec<LanguageEntry>,
396
397 /// ## Example JSON data
398 ///
399 /// ```json
400 /// {
401 /// "genderInfo": {
402 /// "gender": "unknown",
403 /// "genderOverride": "Borg"
404 /// }
405 /// }
406 /// ```
407 pub gender_info: GenderInfo,
408}
409
410/// ## Example
411///
412/// ```
413/// use twitter_archive::structs::personalization::LanguageEntry;
414///
415/// let json = r#"{
416/// "language": "English",
417/// "isDisabled": false
418/// }"#;
419///
420/// let data: LanguageEntry = serde_json::from_str(&json).unwrap();
421///
422/// // De-serialized properties
423/// assert_eq!(data.language, "English");
424/// assert_eq!(data.is_disabled, false);
425///
426/// // Re-serialize is equivalent to original data
427/// assert_eq!(serde_json::to_string_pretty(&data).unwrap(), json);
428/// ```
429#[derive(Deserialize, Serialize, Debug, Clone, Display)]
430#[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
431#[serde(rename_all = "camelCase")]
432pub struct LanguageEntry {
433 /// ## Example JSON data
434 ///
435 /// ```json
436 /// { "language": "English" }
437 /// ```
438 pub language: String,
439
440 /// ## Example JSON data
441 ///
442 /// ```json
443 /// { "isDisabled": false }
444 /// ```
445 pub is_disabled: bool,
446}
447
448/// ## Example
449///
450/// ```
451/// use twitter_archive::structs::personalization::GenderInfo;
452///
453/// let json = r#"{
454/// "gender": "unknown",
455/// "genderOverride": "Borg"
456/// }"#;
457///
458/// let data: GenderInfo = serde_json::from_str(&json).unwrap();
459///
460/// // De-serialized properties
461/// assert_eq!(data.gender, "unknown");
462/// assert_eq!(data.gender_override, "Borg");
463///
464/// // Re-serialize is equivalent to original data
465/// assert_eq!(serde_json::to_string_pretty(&data).unwrap(), json);
466/// ```
467#[derive(Deserialize, Serialize, Debug, Clone, Display)]
468#[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
469#[serde(rename_all = "camelCase")]
470pub struct GenderInfo {
471 /// ## Example JSON data
472 ///
473 /// ```json
474 /// { "gender": "unknown" }
475 /// ```
476 pub gender: String,
477
478 /// ## Example JSON data
479 ///
480 /// ```json
481 /// { "genderOverride": "Borg" }
482 /// ```
483 pub gender_override: String,
484}
485
486/// ## Example
487///
488/// ```
489/// use twitter_archive::structs::personalization::Interests;
490///
491/// let json = r##"{
492/// "interests": [
493/// {
494/// "name": "#HappyFriday",
495/// "isDisabled": false
496/// }
497/// ],
498/// "partnerInterests": [],
499/// "audienceAndAdvertisers": {
500/// "lookalikeAdvertisers": [
501/// "@EXAMPLE_ONE",
502/// "@EXAMPLE_TWO"
503/// ],
504/// "advertisers": [],
505/// "doNotReachAdvertisers": [],
506/// "catalogAudienceAdvertisers": [],
507/// "numAudiences": "0"
508/// },
509/// "shows": [
510/// "1899",
511/// "DuckTales"
512/// ]
513/// }"##;
514///
515/// let data: Interests = serde_json::from_str(&json).unwrap();
516///
517/// // De-serialized properties
518/// assert_eq!(data.interests[0].name, "#HappyFriday");
519/// assert_eq!(data.interests[0].is_disabled, false);
520/// assert_eq!(data.partner_interests.len(), 0);
521/// assert_eq!(data.audience_and_advertisers.lookalike_advertisers.len(), 2);
522/// assert_eq!(data.audience_and_advertisers.advertisers.len(), 0);
523/// assert_eq!(data.audience_and_advertisers.do_not_reach_advertisers.len(), 0);
524/// assert_eq!(data.audience_and_advertisers.catalog_audience_advertisers.len(), 0);
525/// assert_eq!(data.audience_and_advertisers.num_audiences, 0);
526/// assert_eq!(data.shows[0], "1899");
527/// assert_eq!(data.shows[1], "DuckTales");
528///
529/// // Re-serialize is equivalent to original data
530/// assert_eq!(serde_json::to_string_pretty(&data).unwrap(), json);
531/// ```
532#[derive(Deserialize, Serialize, Debug, Clone, Display)]
533#[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
534#[serde(rename_all = "camelCase")]
535pub struct Interests {
536 /// ## Example JSON data
537 ///
538 /// ```json
539 /// {
540 /// "interests": [
541 /// {
542 /// "name": "#HappyFriday",
543 /// "isDisabled": false
544 /// }
545 /// ]
546 /// }
547 /// ```
548 pub interests: Vec<Interest>,
549
550 /// ## Example JSON data
551 ///
552 /// ```json
553 /// { "partnerInterests": [] }
554 /// ```
555 pub partner_interests: Vec<String>,
556
557 /// ## Example JSON data
558 ///
559 /// ```json
560 /// {
561 /// "audienceAndAdvertisers": {
562 /// "lookalikeAdvertisers": [
563 /// "@EXAMPLE_ONE",
564 /// "@EXAMPLE_TWO"
565 /// ],
566 /// "advertisers": [],
567 /// "doNotReachAdvertisers": [],
568 /// "catalogAudienceAdvertisers": [],
569 /// "numAudiences": "0"
570 /// }
571 /// }
572 /// ```
573 pub audience_and_advertisers: AudienceAndAdvertisers,
574
575 /// ## Example JSON data
576 ///
577 /// ```json
578 /// {
579 /// "shows": [
580 /// "1899",
581 /// "DuckTales"
582 /// ]
583 /// }
584 /// ```
585 pub shows: Vec<String>,
586}
587
588/// ## Example
589///
590/// ```
591/// use twitter_archive::structs::personalization::Interest;
592///
593/// let json = r##"{
594/// "name": "#HappyFriday",
595/// "isDisabled": false
596/// }"##;
597///
598/// let data: Interest = serde_json::from_str(&json).unwrap();
599///
600/// // De-serialized properties
601/// assert_eq!(data.name, "#HappyFriday");
602/// assert_eq!(data.is_disabled, false);
603///
604/// // Re-serialize is equivalent to original data
605/// assert_eq!(serde_json::to_string_pretty(&data).unwrap(), json);
606/// ```
607#[derive(Deserialize, Serialize, Debug, Clone, Display)]
608#[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
609#[serde(rename_all = "camelCase")]
610pub struct Interest {
611 /// ## Example JSON data
612 ///
613 /// ```json
614 /// { "name": "#HappyFriday" }
615 /// ```
616 pub name: String,
617
618 /// ## Example JSON data
619 ///
620 /// ```json
621 /// { "isDisabled": false }
622 /// ```
623 pub is_disabled: bool,
624}
625
626/// ## Example
627///
628/// ```
629/// use twitter_archive::structs::personalization::AudienceAndAdvertisers;
630///
631/// let json = r#"{
632/// "lookalikeAdvertisers": [
633/// "@EXAMPLE_ONE",
634/// "@EXAMPLE_TWO"
635/// ],
636/// "advertisers": [],
637/// "doNotReachAdvertisers": [],
638/// "catalogAudienceAdvertisers": [],
639/// "numAudiences": "0"
640/// }"#;
641///
642/// let data: AudienceAndAdvertisers = serde_json::from_str(&json).unwrap();
643///
644/// // De-serialized properties
645/// assert_eq!(data.lookalike_advertisers[0], "@EXAMPLE_ONE");
646/// assert_eq!(data.lookalike_advertisers[1], "@EXAMPLE_TWO");
647/// assert_eq!(data.advertisers.len(), 0);
648/// assert_eq!(data.do_not_reach_advertisers.len(), 0);
649/// assert_eq!(data.catalog_audience_advertisers.len(), 0);
650/// assert_eq!(data.num_audiences, 0);
651///
652/// // Re-serialize is equivalent to original data
653/// assert_eq!(serde_json::to_string_pretty(&data).unwrap(), json);
654/// ```
655#[derive(Deserialize, Serialize, Debug, Clone, Display)]
656#[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
657#[serde(rename_all = "camelCase")]
658pub struct AudienceAndAdvertisers {
659 /// ## Example JSON data
660 ///
661 /// ```json
662 /// {
663 /// "lookalikeAdvertisers": [
664 /// "@EXAMPLE_ONE",
665 /// "@EXAMPLE_TWO"
666 /// ]
667 /// }
668 /// ```
669 pub lookalike_advertisers: Vec<String>,
670
671 /// ## Example JSON data
672 ///
673 /// ```json
674 /// { "advertisers": [] }
675 /// ```
676 pub advertisers: Vec<String>,
677
678 /// ## Example JSON data
679 ///
680 /// ```json
681 /// { "doNotReachAdvertisers": [] }
682 /// ```
683 pub do_not_reach_advertisers: Vec<String>,
684
685 /// ## Example JSON data
686 ///
687 /// ```json
688 /// { "catalogAudienceAdvertisers": [] }
689 /// ```
690 pub catalog_audience_advertisers: Vec<String>,
691
692 /// ## Example JSON data
693 ///
694 /// ```json
695 /// { "numAudiences": "0" }
696 /// ```
697 #[serde(with = "convert::number_like_string")]
698 pub num_audiences: usize,
699}
700
701// TODO: find examples from which data structure(s) may be defined
702// /// ## Example
703// ///
704// /// ```
705// /// use twitter_archive::structs::personalization::LocationHistory;
706// ///
707// /// let json = r#"{ }"#;
708// ///
709// /// let data: LocationHistory = serde_json::from_str(&json).unwrap();
710// ///
711// /// // De-serialized properties
712// /// // assert_eq!(data., "");
713// ///
714// /// // Re-serialize is equivalent to original data without pretty printing
715// /// assert_eq!(serde_json::to_string(&data).unwrap(), json.replace("\n", "").replace(" ", ""));
716// /// ```
717// #[derive(Deserialize, Serialize, Debug, Clone, Display)]
718// #[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
719// #[serde(rename_all = "camelCase")]
720// pub struct LocationHistory {
721// todo!();
722// }
723
724/// ## Example
725///
726/// ```
727/// use twitter_archive::structs::personalization::InferredAgeInfo;
728///
729/// let json = r#"{
730/// "age": [
731/// "13-99"
732/// ],
733/// "birthDate": ""
734/// }"#;
735///
736/// let data: InferredAgeInfo = serde_json::from_str(&json).unwrap();
737///
738/// // De-serialized properties
739/// assert_eq!(data.age[0], "13-99");
740/// assert_eq!(data.birth_date, "");
741///
742/// // Re-serialize is equivalent to original data
743/// assert_eq!(serde_json::to_string_pretty(&data).unwrap(), json);
744/// ```
745#[derive(Deserialize, Serialize, Debug, Clone, Display)]
746#[display(fmt = "{}", "serde_json::to_value(self).unwrap()")]
747#[serde(rename_all = "camelCase")]
748pub struct InferredAgeInfo {
749 /// ## Example JSON data
750 ///
751 /// ```json
752 /// {
753 /// "age": [
754 /// "13-99"
755 /// ]
756 /// }
757 /// ```
758 pub age: Vec<String>,
759
760 /// ## Example JSON data
761 ///
762 /// ```json
763 /// { "birthDate": "" }
764 /// ```
765 pub birth_date: String,
766}