yt_transcript_rs/
streaming_data_extractor.rs

1use crate::errors::{CouldNotRetrieveTranscript, CouldNotRetrieveTranscriptReason};
2use crate::models::{ColorInfo, Range, StreamingData, StreamingFormat};
3use serde_json::Value;
4
5/// # StreamingDataExtractor
6///
7/// Extracts streaming data information from YouTube's player response data.
8///
9/// The streaming data contains information about available video and audio formats,
10/// including URLs, quality options, bitrates, and codec details.
11pub struct StreamingDataExtractor;
12
13impl StreamingDataExtractor {
14    /// Extracts streaming data from the player response JSON.
15    ///
16    /// # Parameters
17    ///
18    /// * `player_response` - The parsed YouTube player response JSON object
19    /// * `video_id` - The YouTube video ID (used for error reporting)
20    ///
21    /// # Returns
22    ///
23    /// * `Result<StreamingData, CouldNotRetrieveTranscript>` - The parsed streaming data or an error
24    ///
25    /// # Errors
26    ///
27    /// This method will return an error if:
28    /// - The streaming data is missing from the player response
29    /// - The JSON structure does not match the expected format
30    pub fn extract_streaming_data(
31        player_response: &Value,
32        video_id: &str,
33    ) -> Result<StreamingData, CouldNotRetrieveTranscript> {
34        let streaming_data = match player_response.get("streamingData") {
35            Some(data) => data,
36            None => {
37                return Err(CouldNotRetrieveTranscript {
38                    video_id: video_id.to_string(),
39                    reason: Some(CouldNotRetrieveTranscriptReason::VideoUnavailable),
40                });
41            }
42        };
43
44        // Extract expires_in_seconds
45        let expires_in_seconds = match streaming_data.get("expiresInSeconds") {
46            Some(Value::String(s)) => s.clone(),
47            _ => "0".to_string(), // Default to 0 if not found
48        };
49
50        // Extract formats
51        let formats = Self::extract_formats(streaming_data.get("formats"));
52
53        // Extract adaptive formats
54        let adaptive_formats = Self::extract_formats(streaming_data.get("adaptiveFormats"));
55
56        // Extract server ABR streaming URL
57        let server_abr_streaming_url = match streaming_data.get("serverAbrStreamingUrl") {
58            Some(Value::String(s)) => Some(s.clone()),
59            _ => None,
60        };
61
62        Ok(StreamingData {
63            expires_in_seconds,
64            formats,
65            adaptive_formats,
66            server_abr_streaming_url,
67        })
68    }
69
70    /// Extracts formats from a JSON array
71    ///
72    /// # Parameters
73    ///
74    /// * `formats_value` - Optional JSON value containing an array of format objects
75    ///
76    /// # Returns
77    ///
78    /// * `Vec<StreamingFormat>` - Vector of parsed streaming formats
79    fn extract_formats(formats_value: Option<&Value>) -> Vec<StreamingFormat> {
80        let mut formats = Vec::new();
81
82        if let Some(Value::Array(array)) = formats_value {
83            for item in array {
84                if let Some(format) = Self::parse_format(item) {
85                    formats.push(format);
86                }
87            }
88        }
89
90        formats
91    }
92
93    /// Parses a single format from JSON
94    ///
95    /// # Parameters
96    ///
97    /// * `format_json` - JSON object containing format data
98    ///
99    /// # Returns
100    ///
101    /// * `Option<StreamingFormat>` - Parsed format or None if parsing failed
102    fn parse_format(format_json: &Value) -> Option<StreamingFormat> {
103        // Required fields
104        let itag = format_json.get("itag")?.as_u64()? as u32;
105        let mime_type = format_json.get("mimeType")?.as_str()?.to_string();
106        let bitrate = format_json.get("bitrate")?.as_u64()?;
107        let quality = format_json.get("quality")?.as_str()?.to_string();
108        let projection_type = format_json.get("projectionType")?.as_str()?.to_string();
109        let approx_duration_ms = format_json.get("approxDurationMs")?.as_str()?.to_string();
110
111        // Optional fields
112        let url = format_json
113            .get("url")
114            .and_then(|v| v.as_str())
115            .map(|s| s.to_string());
116
117        let width = format_json
118            .get("width")
119            .and_then(|v| v.as_u64())
120            .map(|v| v as u32);
121        let height = format_json
122            .get("height")
123            .and_then(|v| v.as_u64())
124            .map(|v| v as u32);
125
126        let fps = format_json
127            .get("fps")
128            .and_then(|v| v.as_u64())
129            .map(|v| v as u32);
130
131        let quality_label = format_json
132            .get("qualityLabel")
133            .and_then(|v| v.as_str())
134            .map(|s| s.to_string());
135
136        let average_bitrate = format_json.get("averageBitrate").and_then(|v| v.as_u64());
137
138        let audio_quality = format_json
139            .get("audioQuality")
140            .and_then(|v| v.as_str())
141            .map(|s| s.to_string());
142
143        let audio_sample_rate = format_json
144            .get("audioSampleRate")
145            .and_then(|v| v.as_str())
146            .map(|s| s.to_string());
147
148        let audio_channels = format_json
149            .get("audioChannels")
150            .and_then(|v| v.as_u64())
151            .map(|v| v as u32);
152
153        let quality_ordinal = format_json
154            .get("qualityOrdinal")
155            .and_then(|v| v.as_str())
156            .map(|s| s.to_string());
157
158        let high_replication = format_json.get("highReplication").and_then(|v| v.as_bool());
159
160        let last_modified = format_json
161            .get("lastModified")
162            .and_then(|v| v.as_str())
163            .map(|s| s.to_string());
164
165        let content_length = format_json
166            .get("contentLength")
167            .and_then(|v| v.as_str())
168            .map(|s| s.to_string());
169
170        let loudness_db = format_json.get("loudnessDb").and_then(|v| v.as_f64());
171
172        let is_drc = format_json.get("isDrc").and_then(|v| v.as_bool());
173
174        let xtags = format_json
175            .get("xtags")
176            .and_then(|v| v.as_str())
177            .map(|s| s.to_string());
178
179        // Extract init_range if present
180        let init_range = format_json.get("initRange").and_then(|range| {
181            let start = range.get("start")?.as_str()?.to_string();
182            let end = range.get("end")?.as_str()?.to_string();
183            Some(Range { start, end })
184        });
185
186        // Extract index_range if present
187        let index_range = format_json.get("indexRange").and_then(|range| {
188            let start = range.get("start")?.as_str()?.to_string();
189            let end = range.get("end")?.as_str()?.to_string();
190            Some(Range { start, end })
191        });
192
193        // Extract color_info if present
194        let color_info = format_json.get("colorInfo").map(|color| {
195            let primaries = color
196                .get("primaries")
197                .and_then(|v| v.as_str())
198                .map(|s| s.to_string());
199            let transfer_characteristics = color
200                .get("transferCharacteristics")
201                .and_then(|v| v.as_str())
202                .map(|s| s.to_string());
203            let matrix_coefficients = color
204                .get("matrixCoefficients")
205                .and_then(|v| v.as_str())
206                .map(|s| s.to_string());
207
208            ColorInfo {
209                primaries,
210                transfer_characteristics,
211                matrix_coefficients,
212            }
213        });
214
215        Some(StreamingFormat {
216            itag,
217            url,
218            mime_type,
219            bitrate,
220            width,
221            height,
222            init_range,
223            index_range,
224            last_modified,
225            content_length,
226            quality,
227            fps,
228            quality_label,
229            projection_type,
230            average_bitrate,
231            audio_quality,
232            approx_duration_ms,
233            audio_sample_rate,
234            audio_channels,
235            quality_ordinal,
236            high_replication,
237            color_info,
238            loudness_db,
239            is_drc,
240            xtags,
241        })
242    }
243}