Skip to main content

narrate_this/
types.rs

1use serde::{Deserialize, Serialize};
2
3/// Configurable style variables for narration prompt templates.
4///
5/// These values are interpolated into the default narration and search-narration
6/// prompts. To override a prompt entirely, set `narration_prompt` or
7/// `search_narration_prompt` on [`super::providers::firecrawl::FirecrawlConfig`].
8#[derive(Debug, Clone)]
9pub struct NarrationStyle {
10    /// Writer role, e.g. "news broadcast scriptwriter", "podcast host"
11    pub role: String,
12    /// Reader persona, e.g. "a news anchor", "a podcast host"
13    pub persona: String,
14    /// Output length, e.g. "2-4 paragraphs, 30-90 seconds when read aloud"
15    pub length: String,
16    /// Tone description, e.g. "Conversational and engaging"
17    pub tone: String,
18    /// Structural guidance, e.g. "Start with the key headline/finding, then provide context"
19    pub structure: String,
20}
21
22impl Default for NarrationStyle {
23    fn default() -> Self {
24        Self {
25            role: "news broadcast scriptwriter".into(),
26            persona: "a news anchor".into(),
27            length: "2-4 paragraphs, 30-90 seconds when read aloud".into(),
28            tone: "Conversational and engaging".into(),
29            structure: "Start with the key headline/finding, then provide context".into(),
30        }
31    }
32}
33
34impl NarrationStyle {
35    pub fn role(mut self, role: impl Into<String>) -> Self {
36        self.role = role.into();
37        self
38    }
39
40    pub fn persona(mut self, persona: impl Into<String>) -> Self {
41        self.persona = persona.into();
42        self
43    }
44
45    pub fn length(mut self, length: impl Into<String>) -> Self {
46        self.length = length.into();
47        self
48    }
49
50    pub fn tone(mut self, tone: impl Into<String>) -> Self {
51        self.tone = tone.into();
52        self
53    }
54
55    pub fn structure(mut self, structure: impl Into<String>) -> Self {
56        self.structure = structure.into();
57        self
58    }
59}
60
61/// A single word-level caption with timing information.
62///
63/// Produced by TTS providers that support alignment data (e.g. ElevenLabs).
64/// Used for subtitle rendering and media segment timing.
65#[derive(Debug, Clone, Serialize, Deserialize)]
66pub struct CaptionSegment {
67    /// The word or token text.
68    pub text: String,
69    /// Start time in milliseconds from the beginning of the audio.
70    pub start_ms: u64,
71    /// Duration in milliseconds.
72    pub duration_ms: u64,
73}
74
75/// The source of a media asset — a URL, local file path, or raw bytes.
76#[derive(Debug, Clone, Serialize, Deserialize)]
77#[serde(tag = "type", content = "value")]
78pub enum MediaSource {
79    /// A remote URL (http/https).
80    Url(String),
81    /// A local file path.
82    FilePath(String),
83    /// Raw bytes (e.g. an in-memory image).
84    Bytes(Vec<u8>),
85}
86
87impl MediaSource {
88    /// Returns a short display string for logging, truncating URLs beyond 80 characters.
89    pub fn display_short(&self) -> String {
90        match self {
91            MediaSource::Url(u) => {
92                if u.len() > 80 {
93                    let truncated: String = u.chars().take(80).collect();
94                    format!("{truncated}…")
95                } else {
96                    u.clone()
97                }
98            }
99            MediaSource::FilePath(p) => p.clone(),
100            MediaSource::Bytes(b) => format!("<{} bytes>", b.len()),
101        }
102    }
103}
104
105impl From<&str> for MediaSource {
106    fn from(s: &str) -> Self {
107        if s.starts_with("http://") || s.starts_with("https://") {
108            MediaSource::Url(s.to_string())
109        } else {
110            MediaSource::FilePath(s.to_string())
111        }
112    }
113}
114
115impl From<String> for MediaSource {
116    fn from(s: String) -> Self {
117        if s.starts_with("http://") || s.starts_with("https://") {
118            MediaSource::Url(s)
119        } else {
120            MediaSource::FilePath(s)
121        }
122    }
123}
124
125impl From<Vec<u8>> for MediaSource {
126    fn from(data: Vec<u8>) -> Self {
127        MediaSource::Bytes(data)
128    }
129}
130
131/// The type of media asset (image or video).
132#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
133#[serde(rename_all = "snake_case")]
134#[non_exhaustive]
135pub enum MediaKind {
136    #[default]
137    Image,
138    Video,
139}
140
141/// A media segment tied to a time range in the narration audio.
142///
143/// Each segment maps a media asset (image or video) to a portion of the timeline.
144#[derive(Debug, Clone, Serialize, Deserialize)]
145pub struct MediaSegment {
146    /// Source of the media asset.
147    pub source: MediaSource,
148    /// Start time in milliseconds.
149    pub start_ms: f64,
150    /// End time in milliseconds.
151    pub end_ms: f64,
152    /// Whether this is an image or video.
153    #[serde(default)]
154    pub kind: MediaKind,
155}
156
157/// Output from a TTS synthesis call.
158#[derive(Debug, Clone)]
159pub struct TtsResult {
160    /// Raw audio bytes (typically MP3).
161    pub audio: Vec<u8>,
162    /// Word-level caption segments with timing.
163    pub captions: Vec<CaptionSegment>,
164}
165
166/// Output from keyword extraction.
167#[derive(Debug, Clone)]
168pub struct KeywordResult {
169    /// Extracted search keywords for media lookup.
170    pub keywords: Vec<String>,
171}
172
173/// Complete pipeline output returned by [`ContentPipeline::process`](crate::ContentPipeline::process).
174#[derive(Debug, Clone)]
175pub struct ContentOutput {
176    /// The narration text (after any text transforms).
177    pub narration: String,
178    /// Raw audio bytes (MP3).
179    pub audio: Vec<u8>,
180    /// Word-level captions with timing data.
181    pub captions: Vec<CaptionSegment>,
182    /// Visual media segments matched to the narration timeline.
183    pub media_segments: Vec<MediaSegment>,
184    /// Path where audio was stored, if an [`AudioStorage`](crate::AudioStorage) was configured.
185    pub audio_path: Option<String>,
186    /// Path to the rendered video, if a [`VideoRenderer`](crate::VideoRenderer) was configured.
187    pub video_path: Option<String>,
188}
189
190/// A background audio track to mix with the narration audio.
191///
192/// Tracks loop by default and play at 30% volume. Use the builder methods
193/// to customize.
194///
195/// # Example
196///
197/// ```
198/// use narrate_this::AudioTrack;
199///
200/// let track = AudioTrack::new("./music.mp3")
201///     .volume(0.15)
202///     .start_at(2000) // delay by 2 seconds
203///     .no_loop();
204/// ```
205#[derive(Debug, Clone)]
206pub struct AudioTrack {
207    /// File path to the audio file.
208    pub path: String,
209    /// Volume level from 0.0 (silent) to 1.0 (full). Default: 0.3.
210    pub volume: f32,
211    /// Optional delay before the track starts (milliseconds).
212    pub start_ms: Option<u64>,
213    /// Optional end time — the track is trimmed at this point (milliseconds).
214    pub end_ms: Option<u64>,
215    /// Whether to loop the track for the duration of the narration. Default: `true`.
216    pub loop_track: bool,
217}
218
219impl AudioTrack {
220    pub fn new(path: impl Into<String>) -> Self {
221        Self {
222            path: path.into(),
223            volume: 0.3,
224            start_ms: None,
225            end_ms: None,
226            loop_track: true,
227        }
228    }
229
230    pub fn volume(mut self, volume: f32) -> Self {
231        self.volume = volume.clamp(0.0, 1.0);
232        self
233    }
234
235    pub fn start_at(mut self, ms: u64) -> Self {
236        self.start_ms = Some(ms);
237        self
238    }
239
240    pub fn end_at(mut self, ms: u64) -> Self {
241        self.end_ms = Some(ms);
242        self
243    }
244
245    pub fn no_loop(mut self) -> Self {
246        self.loop_track = false;
247        self
248    }
249}
250
251/// Input source for content creation.
252#[derive(Debug, Clone)]
253#[non_exhaustive]
254pub enum ContentSource {
255    /// Raw text to narrate directly.
256    Text(String),
257    /// Article URL to scrape and narrate.
258    ArticleUrl { url: String, title: Option<String> },
259    /// Search query — scraper searches and narrates results.
260    SearchQuery(String),
261}
262
263/// A user-provided media asset with a description for AI-based media planning.
264///
265/// # Example
266///
267/// ```
268/// use narrate_this::MediaAsset;
269///
270/// let assets = vec![
271///     MediaAsset::image("./hero.jpg", "A rocket launching into space"),
272///     MediaAsset::video("https://example.com/demo.mp4", "App demo walkthrough"),
273///     MediaAsset::image_bytes(vec![/* png bytes */], "Dashboard screenshot"),
274/// ];
275/// ```
276#[derive(Debug, Clone)]
277pub struct MediaAsset {
278    /// The media source (URL, file path, or bytes).
279    pub source: MediaSource,
280    /// A text description of what this asset depicts, used by the media planner
281    /// to match assets to narration chunks.
282    pub description: String,
283    /// Whether this is an image or video.
284    pub kind: MediaKind,
285}
286
287impl MediaAsset {
288    /// Create an image asset from a URL or file path.
289    pub fn image(source: impl Into<MediaSource>, description: impl Into<String>) -> Self {
290        Self {
291            source: source.into(),
292            description: description.into(),
293            kind: MediaKind::Image,
294        }
295    }
296
297    /// Create a video asset from a URL or file path.
298    pub fn video(source: impl Into<MediaSource>, description: impl Into<String>) -> Self {
299        Self {
300            source: source.into(),
301            description: description.into(),
302            kind: MediaKind::Video,
303        }
304    }
305
306    /// Create an image asset from raw bytes.
307    pub fn image_bytes(data: Vec<u8>, description: impl Into<String>) -> Self {
308        Self {
309            source: MediaSource::Bytes(data),
310            description: description.into(),
311            kind: MediaKind::Image,
312        }
313    }
314
315    /// Create a video asset from raw bytes.
316    pub fn video_bytes(data: Vec<u8>, description: impl Into<String>) -> Self {
317        Self {
318            source: MediaSource::Bytes(data),
319            description: description.into(),
320            kind: MediaKind::Video,
321        }
322    }
323}
324
325/// What to do when the media planner can't match a user asset to a narration chunk.
326#[derive(Debug, Clone, Copy, Default)]
327#[non_exhaustive]
328pub enum MediaFallback {
329    /// Fall back to keyword extraction + stock media search
330    /// (requires `.stock_search()` to be configured on the planner).
331    #[default]
332    StockSearch,
333    /// Skip the chunk — no media for that time range.
334    Skip,
335}
336
337/// A narration chunk with timing information, used by media planners.
338#[derive(Debug, Clone)]
339pub struct TimedChunk {
340    /// The text content of this chunk.
341    pub text: String,
342    /// Start time in milliseconds.
343    pub start_ms: f64,
344    /// End time in milliseconds.
345    pub end_ms: f64,
346}
347
348/// Progress events during pipeline execution.
349#[derive(Debug, Clone)]
350#[non_exhaustive]
351pub enum PipelineProgress {
352    NarrationStarted,
353    NarrationComplete { narration_len: usize },
354    TextTransformStarted,
355    TextTransformComplete { narration_len: usize },
356    TtsStarted,
357    TtsComplete { audio_bytes: usize, caption_count: usize },
358    MediaSearchStarted { chunk_count: usize },
359    MediaSegmentFound { index: usize, kind: MediaKind },
360    MediaSearchComplete { segment_count: usize },
361    AudioStorageStarted,
362    AudioStored { path: String },
363    RenderStarted,
364    RenderComplete { path: String },
365}