use std::fmt::Write as _;
use std::path::Path;
use std::time::Duration;
use anyhow::{Context, Result};
use tokio::process::Command;
use tracing::{info, warn};
use crate::analyze::{TranscribeOptions, TranscriptionResult, default_backend};
const MEDIA_HOSTS: &[&str] = &[
"youtube.com",
"youtu.be",
"vimeo.com",
"soundcloud.com",
"spotify.com",
"podcasts.apple.com",
"anchor.fm",
"overcast.fm",
"twitch.tv",
"dailymotion.com",
"rumble.com",
"bitchute.com",
];
const MEDIA_EXTENSIONS: &[&str] = &[
".mp3", ".mp4", ".m4a", ".wav", ".ogg", ".flac", ".webm", ".opus", ".aac", ".wma", ".avi",
".mkv", ".mov",
];
#[must_use]
pub fn is_media_url(url: &str) -> bool {
let lower = url.to_lowercase();
if MEDIA_HOSTS.iter().any(|h| lower.contains(h)) {
return true;
}
let path_part = lower.split('?').next().unwrap_or(&lower);
MEDIA_EXTENSIONS.iter().any(|ext| path_part.ends_with(ext))
}
#[derive(Debug, Clone)]
pub struct MediaMetadata {
pub title: Option<String>,
pub uploader: Option<String>,
pub duration_string: Option<String>,
pub url: String,
}
pub struct MediaFetchResult {
pub markdown: String,
pub metadata: MediaMetadata,
pub transcription: TranscriptionResult,
}
pub async fn fetch_media_as_markdown(
url: &str,
language: Option<&str>,
diarize: bool,
) -> Result<MediaFetchResult> {
let metadata = extract_metadata(url).await;
let temp_dir = tempfile::tempdir().context("create temp dir")?;
let wav_path = temp_dir.path().join("audio.wav");
download_audio(url, &wav_path)
.await
.context("audio download via yt-dlp/ffmpeg")?;
let backend = default_backend();
if !backend.is_available() {
anyhow::bail!(
"No ASR backend available. Run `nab models fetch fluidaudio` first \
(macOS Apple Silicon) or `nab models fetch sherpa-onnx` (all platforms)."
);
}
let opts = TranscribeOptions {
language: language.map(String::from),
word_timestamps: true,
diarize,
..Default::default()
};
info!(url, backend = backend.name(), "transcribing media");
let result = backend
.transcribe(&wav_path, opts)
.await
.context("transcription failed")?;
let markdown = format_transcript_markdown(&metadata, &result);
Ok(MediaFetchResult {
markdown,
metadata,
transcription: result,
})
}
async fn extract_metadata(url: &str) -> MediaMetadata {
let output = Command::new("yt-dlp")
.args([
"--no-playlist",
"--skip-download",
"--print",
"%(title)s\n%(uploader)s\n%(duration_string)s",
url,
])
.output()
.await;
let stdout = match output {
Ok(o) if o.status.success() => String::from_utf8_lossy(&o.stdout).into_owned(),
Ok(o) => {
warn!(
"yt-dlp metadata failed: {}",
String::from_utf8_lossy(&o.stderr).trim()
);
return MediaMetadata {
title: None,
uploader: None,
duration_string: None,
url: url.to_owned(),
};
}
Err(e) => {
warn!("yt-dlp not found or spawn failed: {e}");
return MediaMetadata {
title: None,
uploader: None,
duration_string: None,
url: url.to_owned(),
};
}
};
let mut lines = stdout.lines();
let title = lines
.next()
.map(str::trim)
.filter(|s| !s.is_empty() && *s != "NA")
.map(String::from);
let uploader = lines
.next()
.map(str::trim)
.filter(|s| !s.is_empty() && *s != "NA")
.map(String::from);
let duration_string = lines
.next()
.map(str::trim)
.filter(|s| !s.is_empty() && *s != "NA")
.map(String::from);
MediaMetadata {
title,
uploader,
duration_string,
url: url.to_owned(),
}
}
async fn download_audio(url: &str, wav_path: &Path) -> Result<()> {
let temp_base = wav_path
.parent()
.context("wav_path has no parent directory")?;
let temp_audio = temp_base.join("audio_raw.%(ext)s");
let common_args: &[&str] = &[
"--no-playlist",
"-f",
"bestaudio",
"--cookies-from-browser",
"brave",
"-o",
temp_audio.to_str().context("non-UTF-8 temp path")?,
url,
];
let ytdlp_status = run_ytdlp("yt-dlp", common_args).await;
if ytdlp_status.is_err() {
info!("system yt-dlp failed, retrying via uvx");
let mut uvx_args = vec!["--from", "yt-dlp[default]", "yt-dlp"];
uvx_args.extend(common_args);
run_ytdlp("uvx", &uvx_args)
.await
.context("yt-dlp via uvx also failed")?;
}
let downloaded = find_downloaded_audio(temp_base)
.context("no audio file produced by yt-dlp — check URL and yt-dlp installation")?;
convert_to_wav(&downloaded, wav_path).await
}
async fn run_ytdlp(binary: &str, args: &[&str]) -> Result<()> {
let status = Command::new(binary)
.args(args)
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.await
.with_context(|| format!("spawn {binary}"))?;
if status.success() {
Ok(())
} else {
Err(anyhow::anyhow!("{binary} exited with {status}"))
}
}
fn find_downloaded_audio(dir: &Path) -> Option<std::path::PathBuf> {
std::fs::read_dir(dir).ok()?.find_map(|entry| {
let path = entry.ok()?.path();
let name = path.file_name()?.to_str()?;
if name == "audio.wav" || name.contains("audio_raw.%(ext)s") {
return None;
}
if name.starts_with("audio_raw.") && path.is_file() {
return Some(path);
}
None
})
}
async fn convert_to_wav(input: &Path, output: &Path) -> Result<()> {
let status = Command::new("ffmpeg")
.args([
"-i",
input.to_str().context("non-UTF-8 input path")?,
"-ar",
"16000",
"-ac",
"1",
"-c:a",
"pcm_s16le",
output.to_str().context("non-UTF-8 output path")?,
"-y",
])
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.await
.context("spawn ffmpeg")?;
if status.success() {
Ok(())
} else {
Err(anyhow::anyhow!("ffmpeg conversion failed: {status}"))
}
}
#[must_use]
pub fn format_transcript_markdown(
metadata: &MediaMetadata,
result: &TranscriptionResult,
) -> String {
let mut out = String::with_capacity(result.segments.len() * 120 + 512);
let heading = metadata.title.as_deref().unwrap_or(metadata.url.as_str());
let _ = writeln!(out, "# {heading}\n");
let _ = writeln!(out, "**Source**: {}", metadata.url);
if let Some(ref uploader) = metadata.uploader {
let _ = writeln!(out, "**Uploader**: {uploader}");
}
if let Some(ref dur) = metadata.duration_string {
let _ = writeln!(out, "**Duration**: {dur}");
}
let _ = writeln!(
out,
"**Model**: {} | **RTFx**: {:.0}×",
result.model, result.rtfx
);
let _ = writeln!(out, "**Language**: {}", result.language);
out.push_str("\n---\n\n## Transcript\n\n");
for seg in &result.segments {
let timestamp = format_seconds(seg.start);
let text = seg.text.trim();
if text.is_empty() {
continue;
}
match seg.speaker.as_deref() {
Some(speaker) => {
let _ = writeln!(out, "**[{timestamp}] {speaker}** {text}\n");
}
None => {
let _ = writeln!(out, "**[{timestamp}]** {text}\n");
}
}
}
let trimmed = out.trim_end();
trimmed.to_owned()
}
fn format_seconds(seconds: f64) -> String {
let total = Duration::try_from_secs_f64(seconds.max(0.0)).map_or(0, |d| d.as_secs());
let h = total / 3600;
let m = (total % 3600) / 60;
let s = total % 60;
if h > 0 {
format!("{h}:{m:02}:{s:02}")
} else {
format!("{m}:{s:02}")
}
}
#[cfg(test)]
mod tests {
use crate::analyze::asr_backend::TranscriptSegment;
use super::*;
fn make_result(segments: Vec<TranscriptSegment>) -> TranscriptionResult {
TranscriptionResult {
segments,
language: "en".to_string(),
duration_seconds: 120.0,
model: "parakeet-tdt-0.6b-v3".to_string(),
backend: "fluidaudio".to_string(),
rtfx: 131.0,
processing_time_seconds: 0.92,
speakers: None,
footnotes: None,
active_reading: None,
}
}
fn make_metadata(title: Option<&str>, url: &str) -> MediaMetadata {
MediaMetadata {
title: title.map(String::from),
uploader: Some("Test Channel".to_string()),
duration_string: Some("2:00".to_string()),
url: url.to_string(),
}
}
#[test]
fn is_media_url_youtube_watch() {
assert!(is_media_url("https://www.youtube.com/watch?v=Cn8HBj8QAbk"));
}
#[test]
fn is_media_url_youtu_be_shortlink() {
assert!(is_media_url("https://youtu.be/Cn8HBj8QAbk"));
}
#[test]
fn is_media_url_vimeo() {
assert!(is_media_url("https://vimeo.com/123456789"));
}
#[test]
fn is_media_url_soundcloud() {
assert!(is_media_url("https://soundcloud.com/artist/track"));
}
#[test]
fn is_media_url_direct_mp3() {
assert!(is_media_url("https://example.com/podcast/episode.mp3"));
}
#[test]
fn is_media_url_direct_mp4() {
assert!(is_media_url("https://cdn.example.com/video.mp4"));
}
#[test]
fn is_media_url_direct_mp3_with_query_params() {
assert!(is_media_url(
"https://example.com/video.mp4?token=abc&expires=9999"
));
}
#[test]
fn is_media_url_html_page_returns_false() {
assert!(!is_media_url("https://example.com/article"));
}
#[test]
fn is_media_url_pdf_returns_false() {
assert!(!is_media_url("https://example.com/paper.pdf"));
}
#[test]
fn format_transcript_markdown_includes_header_with_title() {
let meta = make_metadata(Some("My Podcast Episode"), "https://example.com/ep1.mp3");
let result = make_result(vec![]);
let md = format_transcript_markdown(&meta, &result);
assert!(md.contains("# My Podcast Episode"), "got:\n{md}");
assert!(
md.contains("**Source**: https://example.com/ep1.mp3"),
"got:\n{md}"
);
assert!(md.contains("**Uploader**: Test Channel"), "got:\n{md}");
assert!(md.contains("**Duration**: 2:00"), "got:\n{md}");
}
#[test]
fn format_transcript_markdown_uses_url_when_no_title() {
let meta = MediaMetadata {
title: None,
uploader: None,
duration_string: None,
url: "https://example.com/audio.mp3".to_string(),
};
let result = make_result(vec![]);
let md = format_transcript_markdown(&meta, &result);
assert!(md.contains("# https://example.com/audio.mp3"), "got:\n{md}");
}
#[test]
fn format_transcript_markdown_includes_timestamps() {
let segments = vec![
TranscriptSegment {
text: "Hello world.".to_string(),
start: 0.0,
end: 2.5,
confidence: 0.98,
language: None,
speaker: None,
words: None,
},
TranscriptSegment {
text: "Second segment.".to_string(),
start: 15.0,
end: 18.0,
confidence: 0.96,
language: None,
speaker: None,
words: None,
},
];
let meta = make_metadata(Some("Test"), "https://example.com/video.mp4");
let result = make_result(segments);
let md = format_transcript_markdown(&meta, &result);
assert!(md.contains("**[0:00]** Hello world."), "got:\n{md}");
assert!(md.contains("**[0:15]** Second segment."), "got:\n{md}");
}
#[test]
fn format_transcript_markdown_includes_speaker_labels_when_diarized() {
let segments = vec![TranscriptSegment {
text: "Welcome to the show.".to_string(),
start: 0.5,
end: 3.0,
confidence: 0.99,
language: None,
speaker: Some("SPEAKER_00".to_string()),
words: None,
}];
let meta = make_metadata(Some("Interview"), "https://youtube.com/watch?v=abc");
let result = make_result(segments);
let md = format_transcript_markdown(&meta, &result);
assert!(
md.contains("**[0:00] SPEAKER_00** Welcome to the show."),
"got:\n{md}"
);
}
#[test]
fn format_seconds_minutes_only() {
assert_eq!(format_seconds(75.9), "1:15");
}
#[test]
fn format_seconds_hours() {
assert_eq!(format_seconds(3661.0), "1:01:01");
}
#[test]
fn format_seconds_zero() {
assert_eq!(format_seconds(0.0), "0:00");
}
}