yt_transcript_rs/
microformat_extractor.rs

1use crate::errors::{CouldNotRetrieveTranscript, CouldNotRetrieveTranscriptReason};
2use crate::models::{MicroformatData, MicroformatEmbed, MicroformatThumbnail, VideoThumbnail};
3
4/// # MicroformatExtractor
5///
6/// Extracts microformat information from YouTube's player response data.
7///
8/// The microformat data contains additional metadata about the video that is not included
9/// in the main video details, such as available countries, category, and embed information.
10pub struct MicroformatExtractor;
11
12impl MicroformatExtractor {
13    /// Extracts microformat data from the player response JSON.
14    ///
15    /// # Parameters
16    ///
17    /// * `player_response` - The parsed YouTube player response JSON object
18    /// * `video_id` - The YouTube video ID (used for error reporting)
19    ///
20    /// # Returns
21    ///
22    /// * `Result<MicroformatData, CouldNotRetrieveTranscript>` - The parsed microformat data or an error
23    ///
24    /// # Errors
25    ///
26    /// This method will return an error if:
27    /// - The microformat data is missing from the player response
28    /// - The JSON structure does not match the expected format
29    pub fn extract_microformat_data(
30        player_response: &serde_json::Value,
31        video_id: &str,
32    ) -> Result<MicroformatData, CouldNotRetrieveTranscript> {
33        let renderer = match player_response.get("microformat") {
34            Some(microformat) => match microformat.get("playerMicroformatRenderer") {
35                Some(renderer) => renderer,
36                None => {
37                    return Err(CouldNotRetrieveTranscript {
38                        video_id: video_id.to_string(),
39                        reason: Some(CouldNotRetrieveTranscriptReason::VideoUnavailable),
40                    });
41                }
42            },
43            None => {
44                return Err(CouldNotRetrieveTranscript {
45                    video_id: video_id.to_string(),
46                    reason: Some(CouldNotRetrieveTranscriptReason::VideoUnavailable),
47                });
48            }
49        };
50
51        // Manual extraction of fields to handle YouTube's nested format
52        let mut microformat_data = MicroformatData {
53            available_countries: None,
54            category: None,
55            description: None,
56            embed: None,
57            external_channel_id: None,
58            external_video_id: None,
59            has_ypc_metadata: None,
60            is_family_safe: None,
61            is_shorts_eligible: None,
62            is_unlisted: None,
63            length_seconds: None,
64            like_count: None,
65            owner_channel_name: None,
66            owner_profile_url: None,
67            publish_date: None,
68            thumbnail: None,
69            title: None,
70            upload_date: None,
71            view_count: None,
72        };
73
74        // Extract simple string fields
75        if let Some(value) = renderer.get("externalVideoId") {
76            if let Some(s) = value.as_str() {
77                microformat_data.external_video_id = Some(s.to_string());
78            }
79        }
80
81        if let Some(value) = renderer.get("externalChannelId") {
82            if let Some(s) = value.as_str() {
83                microformat_data.external_channel_id = Some(s.to_string());
84            }
85        }
86
87        if let Some(value) = renderer.get("ownerChannelName") {
88            if let Some(s) = value.as_str() {
89                microformat_data.owner_channel_name = Some(s.to_string());
90            }
91        }
92
93        if let Some(value) = renderer.get("ownerProfileUrl") {
94            if let Some(s) = value.as_str() {
95                microformat_data.owner_profile_url = Some(s.to_string());
96            }
97        }
98
99        if let Some(value) = renderer.get("category") {
100            if let Some(s) = value.as_str() {
101                microformat_data.category = Some(s.to_string());
102            }
103        }
104
105        if let Some(value) = renderer.get("lengthSeconds") {
106            if let Some(s) = value.as_str() {
107                microformat_data.length_seconds = Some(s.to_string());
108            }
109        }
110
111        if let Some(value) = renderer.get("viewCount") {
112            if let Some(s) = value.as_str() {
113                microformat_data.view_count = Some(s.to_string());
114            }
115        }
116
117        if let Some(value) = renderer.get("likeCount") {
118            if let Some(s) = value.as_str() {
119                microformat_data.like_count = Some(s.to_string());
120            }
121        }
122
123        if let Some(value) = renderer.get("uploadDate") {
124            if let Some(s) = value.as_str() {
125                microformat_data.upload_date = Some(s.to_string());
126            }
127        }
128
129        if let Some(value) = renderer.get("publishDate") {
130            if let Some(s) = value.as_str() {
131                microformat_data.publish_date = Some(s.to_string());
132            }
133        }
134
135        // Extract boolean fields
136        if let Some(value) = renderer.get("isFamilySafe") {
137            if let Some(b) = value.as_bool() {
138                microformat_data.is_family_safe = Some(b);
139            }
140        }
141
142        if let Some(value) = renderer.get("isUnlisted") {
143            if let Some(b) = value.as_bool() {
144                microformat_data.is_unlisted = Some(b);
145            }
146        }
147
148        if let Some(value) = renderer.get("isShortsEligible") {
149            if let Some(b) = value.as_bool() {
150                microformat_data.is_shorts_eligible = Some(b);
151            }
152        }
153
154        if let Some(value) = renderer.get("hasYpcMetadata") {
155            if let Some(b) = value.as_bool() {
156                microformat_data.has_ypc_metadata = Some(b);
157            }
158        }
159
160        // Extract nested fields
161        // Title (which is in simpleText format)
162        if let Some(title) = renderer.get("title") {
163            if let Some(simple_text) = title.get("simpleText") {
164                if let Some(text) = simple_text.as_str() {
165                    microformat_data.title = Some(text.to_string());
166                }
167            }
168        }
169
170        // Description (which is in simpleText format)
171        if let Some(description) = renderer.get("description") {
172            if let Some(simple_text) = description.get("simpleText") {
173                if let Some(text) = simple_text.as_str() {
174                    microformat_data.description = Some(text.to_string());
175                }
176            }
177        }
178
179        // Available countries (which is an array)
180        if let Some(countries) = renderer.get("availableCountries") {
181            if let Some(countries_array) = countries.as_array() {
182                let mut country_list = Vec::new();
183                for country in countries_array {
184                    if let Some(country_code) = country.as_str() {
185                        country_list.push(country_code.to_string());
186                    }
187                }
188                if !country_list.is_empty() {
189                    microformat_data.available_countries = Some(country_list);
190                }
191            }
192        }
193
194        // Embed information
195        if let Some(embed_obj) = renderer.get("embed") {
196            let mut embed = MicroformatEmbed {
197                height: None,
198                iframe_url: None,
199                width: None,
200            };
201
202            if let Some(height) = embed_obj.get("height") {
203                if let Some(h) = height.as_i64() {
204                    embed.height = Some(h as i32);
205                }
206            }
207
208            if let Some(width) = embed_obj.get("width") {
209                if let Some(w) = width.as_i64() {
210                    embed.width = Some(w as i32);
211                }
212            }
213
214            if let Some(url) = embed_obj.get("iframeUrl") {
215                if let Some(u) = url.as_str() {
216                    embed.iframe_url = Some(u.to_string());
217                }
218            }
219
220            microformat_data.embed = Some(embed);
221        }
222
223        // Thumbnail information
224        if let Some(thumbnail_obj) = renderer.get("thumbnail") {
225            if let Some(thumbnails) = thumbnail_obj.get("thumbnails") {
226                if let Some(thumbnail_array) = thumbnails.as_array() {
227                    let mut thumb_list = Vec::new();
228
229                    for thumb in thumbnail_array {
230                        let mut video_thumb = VideoThumbnail {
231                            url: String::new(),
232                            width: 0,
233                            height: 0,
234                        };
235
236                        if let Some(url) = thumb.get("url") {
237                            if let Some(u) = url.as_str() {
238                                video_thumb.url = u.to_string();
239                            }
240                        }
241
242                        if let Some(width) = thumb.get("width") {
243                            if let Some(w) = width.as_i64() {
244                                video_thumb.width = w as u32;
245                            }
246                        }
247
248                        if let Some(height) = thumb.get("height") {
249                            if let Some(h) = height.as_i64() {
250                                video_thumb.height = h as u32;
251                            }
252                        }
253
254                        // Only add if we have a valid URL
255                        if !video_thumb.url.is_empty() {
256                            thumb_list.push(video_thumb);
257                        }
258                    }
259
260                    if !thumb_list.is_empty() {
261                        microformat_data.thumbnail = Some(MicroformatThumbnail {
262                            thumbnails: Some(thumb_list),
263                        });
264                    }
265                }
266            }
267        }
268
269        Ok(microformat_data)
270    }
271}