yt_transcript_rs/
captions_extractor.rs

1use crate::errors::{CouldNotRetrieveTranscript, CouldNotRetrieveTranscriptReason};
2
3/// # CaptionsExtractor
4///
5/// Extracts captions/transcript data from YouTube's player response JSON.
6///
7/// This utility struct provides functionality to parse YouTube's captions data
8/// and extract detailed information about available transcripts.
9pub struct CaptionsExtractor;
10
11impl CaptionsExtractor {
12    /// Extracts captions data from the player response JSON.
13    ///
14    /// # Parameters
15    ///
16    /// * `player_response` - The parsed YouTube player response JSON object
17    /// * `video_id` - The YouTube video ID (used for error reporting)
18    ///
19    /// # Returns
20    ///
21    /// * `Result<serde_json::Value, CouldNotRetrieveTranscript>` - The captions JSON data or an error
22    ///
23    /// # Errors
24    ///
25    /// This method will return a specific error if:
26    /// - Transcripts are disabled for the video
27    /// - The captions data is missing or in an unexpected format
28    pub fn extract_captions_data(
29        player_response: &serde_json::Value,
30        video_id: &str,
31    ) -> Result<serde_json::Value, CouldNotRetrieveTranscript> {
32        // Extract captions from player response
33        match player_response.get("captions") {
34            Some(captions) => match captions.get("playerCaptionsTracklistRenderer") {
35                Some(renderer) => Ok(renderer.clone()),
36                None => Err(CouldNotRetrieveTranscript {
37                    video_id: video_id.to_string(),
38                    reason: Some(CouldNotRetrieveTranscriptReason::TranscriptsDisabled),
39                }),
40            },
41            None => Err(CouldNotRetrieveTranscript {
42                video_id: video_id.to_string(),
43                reason: Some(CouldNotRetrieveTranscriptReason::TranscriptsDisabled),
44            }),
45        }
46    }
47}
48
49#[cfg(test)]
50mod tests {
51    use super::*;
52    use serde_json::json;
53
54    #[test]
55    fn test_extract_captions_data_success() {
56        // Test successful extraction of captions data
57        let video_id = "test_video_id";
58
59        // Create a mock player response with captions data
60        let mock_renderer = json!({
61            "captionTracks": [
62                {
63                    "baseUrl": "https://example.com/captions",
64                    "name": { "simpleText": "English" },
65                    "vssId": ".en",
66                    "languageCode": "en",
67                    "isTranslatable": true
68                }
69            ]
70        });
71
72        let player_response = json!({
73            "captions": {
74                "playerCaptionsTracklistRenderer": mock_renderer
75            }
76        });
77
78        // Extract captions data
79        let result = CaptionsExtractor::extract_captions_data(&player_response, video_id);
80
81        // Verify the result
82        assert!(result.is_ok());
83        let extracted_data = result.unwrap();
84        assert_eq!(extracted_data, mock_renderer);
85
86        // Verify content of the extracted data
87        assert!(extracted_data.get("captionTracks").is_some());
88        let tracks = extracted_data["captionTracks"].as_array().unwrap();
89        assert_eq!(tracks.len(), 1);
90        assert_eq!(tracks[0]["languageCode"], "en");
91    }
92
93    #[test]
94    fn test_extract_captions_data_missing_captions() {
95        // Test when the player response has no captions field
96        let video_id = "test_video_id";
97        let player_response = json!({
98            "videoDetails": {
99                "videoId": "test_video_id",
100                "title": "Test Video"
101            }
102            // No captions field
103        });
104
105        // Extract captions data
106        let result = CaptionsExtractor::extract_captions_data(&player_response, video_id);
107
108        // Verify it returns an error
109        assert!(result.is_err());
110        let error = result.unwrap_err();
111        assert_eq!(error.video_id, video_id);
112        assert!(matches!(
113            error.reason,
114            Some(CouldNotRetrieveTranscriptReason::TranscriptsDisabled)
115        ));
116    }
117
118    #[test]
119    fn test_extract_captions_data_missing_renderer() {
120        // Test when the captions field exists but has no playerCaptionsTracklistRenderer
121        let video_id = "test_video_id";
122        let player_response = json!({
123            "captions": {
124                // No playerCaptionsTracklistRenderer field
125                "otherField": "value"
126            }
127        });
128
129        // Extract captions data
130        let result = CaptionsExtractor::extract_captions_data(&player_response, video_id);
131
132        // Verify it returns an error
133        assert!(result.is_err());
134        let error = result.unwrap_err();
135        assert_eq!(error.video_id, video_id);
136        assert!(matches!(
137            error.reason,
138            Some(CouldNotRetrieveTranscriptReason::TranscriptsDisabled)
139        ));
140    }
141
142    #[test]
143    fn test_extract_captions_data_empty_renderer() {
144        // Test when the renderer exists but has no useful data
145        let video_id = "test_video_id";
146        let player_response = json!({
147            "captions": {
148                "playerCaptionsTracklistRenderer": {}
149            }
150        });
151
152        // Extract captions data
153        let result = CaptionsExtractor::extract_captions_data(&player_response, video_id);
154
155        // Should succeed but return empty object
156        assert!(result.is_ok());
157        let extracted_data = result.unwrap();
158        assert!(extracted_data.is_object());
159        assert_eq!(extracted_data.as_object().unwrap().len(), 0);
160    }
161
162    #[test]
163    fn test_extract_captions_data_complex_structure() {
164        // Test with a more complex data structure similar to real YouTube responses
165        let video_id = "test_video_id";
166
167        let player_response = json!({
168            "captions": {
169                "playerCaptionsTracklistRenderer": {
170                    "captionTracks": [
171                        {
172                            "baseUrl": "https://example.com/captions/en",
173                            "name": { "simpleText": "English" },
174                            "vssId": ".en",
175                            "languageCode": "en",
176                            "isTranslatable": true
177                        },
178                        {
179                            "baseUrl": "https://example.com/captions/fr",
180                            "name": { "simpleText": "French" },
181                            "vssId": ".fr",
182                            "languageCode": "fr",
183                            "isTranslatable": true
184                        }
185                    ],
186                    "audioTracks": [
187                        {
188                            "captionTrackIndices": [0, 1],
189                            "defaultCaptionTrackIndex": 0
190                        }
191                    ],
192                    "translationLanguages": [
193                        {
194                            "languageCode": "es",
195                            "languageName": { "simpleText": "Spanish" }
196                        },
197                        {
198                            "languageCode": "de",
199                            "languageName": { "simpleText": "German" }
200                        }
201                    ]
202                }
203            }
204        });
205
206        // Extract captions data
207        let result = CaptionsExtractor::extract_captions_data(&player_response, video_id);
208
209        // Verify the result
210        assert!(result.is_ok());
211        let extracted_data = result.unwrap();
212
213        // Verify the structure
214        assert!(extracted_data.get("captionTracks").is_some());
215        let tracks = extracted_data["captionTracks"].as_array().unwrap();
216        assert_eq!(tracks.len(), 2);
217        assert_eq!(tracks[0]["languageCode"], "en");
218        assert_eq!(tracks[1]["languageCode"], "fr");
219
220        // Verify translation languages
221        let translations = extracted_data["translationLanguages"].as_array().unwrap();
222        assert_eq!(translations.len(), 2);
223        assert_eq!(translations[0]["languageCode"], "es");
224        assert_eq!(translations[1]["languageCode"], "de");
225    }
226}