use crate::cli::{Cli, ProviderChoice};
use crate::error::{AppError, AppResult};
use crate::parse::video_id::extract_video_id;
use crate::provider::{Format, ProviderChain};
use crate::text::normalize_nfc;
use serde::Serialize;
use std::process::ExitCode;
pub mod batch;
pub mod extract;
#[derive(Debug, Serialize)]
struct JsonSuccess {
provider: &'static str,
video_id: String,
language: String,
format: String,
content: String,
byte_size: u64,
duration_ms: u64,
source_url: String,
}
#[derive(Debug, Serialize)]
struct JsonError {
error: bool,
code: u8,
message: String,
}
#[derive(Debug, Serialize)]
struct JsonDryRun {
event: &'static str,
video_id: String,
language: String,
format: String,
would_fetch: bool,
}
#[tracing::instrument(level = "debug", err, skip(cli), fields(batch = cli.batch, url = ?cli.url, json = cli.json, verbose = cli.verbose, provider = ?cli.provider))]
pub async fn run(cli: Cli) -> AppResult<ExitCode> {
cli.validate()?;
let chain = build_provider_chain(&cli);
if cli.batch {
batch::run(&cli, &chain).await
} else {
extract::run(&cli, &chain).await
}
}
#[tracing::instrument(level = "debug", skip(cli), fields(provider = ?cli.provider))]
fn build_provider_chain(cli: &Cli) -> ProviderChain {
let mut providers: Vec<Box<dyn crate::provider::Provider>> = Vec::new();
let _choice = cli.provider.unwrap_or(ProviderChoice::Auto);
let noteey = crate::provider::provider_noteey::ProviderNoteey::new()
.with_language(language_to_str(cli.lang));
providers.push(Box::new(noteey));
ProviderChain::new(providers)
}
pub fn format_to_provider_format(arg: crate::cli::FormatArg) -> Format {
match arg {
crate::cli::FormatArg::Txt => Format::Txt,
crate::cli::FormatArg::Srt => Format::Srt,
}
}
pub fn language_to_str(arg: crate::cli::LanguageArg) -> &'static str {
match arg {
crate::cli::LanguageArg::En => "en",
crate::cli::LanguageArg::Pt => "pt",
crate::cli::LanguageArg::Es => "es",
crate::cli::LanguageArg::Fr => "fr",
crate::cli::LanguageArg::De => "de",
crate::cli::LanguageArg::It => "it",
}
}
pub fn convert_format(
content: &[u8],
format: Format,
format_hint: crate::provider::SubtitleFormat,
) -> AppResult<String> {
match (format, format_hint) {
(Format::Srt, crate::provider::SubtitleFormat::Srt) => {
String::from_utf8(content.to_vec())
.map_err(|e| AppError::Internal(format!("srt is not valid utf-8: {e}")))
}
(Format::Txt, crate::provider::SubtitleFormat::Srt) => {
let srt_text = String::from_utf8(content.to_vec())
.map_err(|e| AppError::Internal(format!("srt is not valid utf-8: {e}")))?;
crate::parse::srt_to_text(&srt_text)
}
(Format::Txt, crate::provider::SubtitleFormat::NoteeyTranscript) => {
let raw = String::from_utf8(content.to_vec())
.map_err(|e| AppError::Internal(format!("noteey body not valid utf-8: {e}")))?;
crate::parse::noteey_to_text(&raw)
}
(Format::Srt, crate::provider::SubtitleFormat::NoteeyTranscript) => Err(
AppError::InvalidUsage(
"--format srt is not available when the only source is noteey.com \
(transcript has no SRT framing); use --format txt (default)"
.to_string(),
),
),
}
}
pub async fn output_success(
cli: &Cli,
provider: &'static str,
video_id: &str,
content: &str,
source_url: &str,
byte_size: u64,
duration_ms: u64,
) -> AppResult<()> {
if cli.json {
let payload = JsonSuccess {
provider,
video_id: video_id.to_string(),
language: language_to_str(cli.lang).to_string(),
format: format_to_str(cli.format).to_string(),
content: normalize_nfc(content),
byte_size,
duration_ms,
source_url: source_url.to_string(),
};
let json = serde_json::to_string(&payload).map_err(AppError::Serde)?;
crate::io::write_subtitle_to_stdout(json.as_bytes()).await?;
} else {
let nfc = normalize_nfc(content);
crate::io::write_subtitle_to_stdout(nfc.as_bytes()).await?;
}
Ok(())
}
pub async fn output_error(cli: &Cli, err: &AppError) -> AppResult<()> {
if cli.json {
let payload = JsonError {
error: true,
code: err.exit_code(),
message: err.to_string(),
};
if let Ok(json) = serde_json::to_string(&payload) {
let _ = crate::io::write_subtitle_to_stdout(json.as_bytes()).await;
}
}
Ok(())
}
pub async fn output_dry_run(cli: &Cli, video_id: &str, would_fetch: bool) -> AppResult<()> {
if cli.json {
let payload = JsonDryRun {
event: "dry_run_cache_miss",
video_id: video_id.to_string(),
language: language_to_str(cli.lang).to_string(),
format: format_to_str(cli.format).to_string(),
would_fetch,
};
if let Ok(json) = serde_json::to_string(&payload) {
crate::io::write_subtitle_to_stdout(json.as_bytes()).await?;
}
} else if would_fetch {
crate::io::write_subtitle_to_stdout(format!("dry_run_cache_miss {video_id}\n").as_bytes())
.await?;
} else {
crate::io::write_subtitle_to_stdout(format!("dry_run_cache_hit {video_id}\n").as_bytes())
.await?;
}
Ok(())
}
fn format_to_str(arg: crate::cli::FormatArg) -> &'static str {
match arg {
crate::cli::FormatArg::Txt => "txt",
crate::cli::FormatArg::Srt => "srt",
}
}
pub async fn extract_url_from_input(cli: &Cli) -> AppResult<String> {
if let Some(url) = &cli.url {
return Ok(url.clone());
}
if cli.batch {
return Err(AppError::InvalidUsage(
"extract cannot be called with --batch".to_string(),
));
}
crate::io::read_url_from_stdin().await
}
pub fn parse_video_id_from_url(cli: &Cli, url: &str) -> AppResult<String> {
let id = extract_video_id(url)?;
if cli.verbose && !cli.quiet {
tracing::info!(target: "events", event = "video_id_extracted", video_id = %id);
}
Ok(id)
}