minutes-core 0.18.8

use crate::config::Config;
use crate::error::{MinutesError, Result, TranscribeError};
use crate::pipeline::{clean_transcript_line, normalize_space};
use crate::{markdown::ContentType, transcribe};
use chrono::Utc;
use serde::{Deserialize, Serialize};
use std::fs;
use std::path::{Path, PathBuf};
#[cfg(target_os = "macos")]
use std::time::Duration;
use std::time::Instant;

#[cfg(target_os = "macos")]
use crate::calendar::output_with_timeout;
#[cfg(target_os = "macos")]
use std::process::Command;

#[cfg(target_os = "macos")]
const HELPER_SOURCE: &str = include_str!("../resources/apple-speech-helper.swift");
#[cfg(target_os = "macos")]
const HELPER_TIMEOUT: Duration = Duration::from_secs(30);
#[cfg(target_os = "macos")]
const HELPER_TRANSCRIBE_TIMEOUT: Duration = Duration::from_secs(900);

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AppleSpeechModuleCapability {
    pub module_id: String,
    pub is_available: Option<bool>,
    pub asset_status: String,
    pub supported_locales: Vec<String>,
    pub installed_locales: Vec<String>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AppleSpeechCapabilityReport {
    pub kind: String,
    pub schema_version: u32,
    pub os_version: String,
    pub runtime_supported: bool,
    pub read_only: bool,
    pub speech_transcriber: AppleSpeechModuleCapability,
    pub dictation_transcriber: AppleSpeechModuleCapability,
    pub notes: Vec<String>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AppleSpeechTranscriptSegment {
    pub start_ms: u64,
    pub duration_ms: u64,
    pub text: String,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AppleSpeechTranscriptionResult {
    pub kind: String,
    pub schema_version: u32,
    pub module_id: String,
    pub locale: String,
    pub ensure_assets: bool,
    pub os_version: String,
    pub runtime_supported: bool,
    pub asset_status_before: String,
    pub asset_status_after: String,
    pub total_elapsed_ms: u64,
    pub first_result_elapsed_ms: Option<u64>,
    pub transcript: String,
    pub word_count: usize,
    pub segments: Vec<AppleSpeechTranscriptSegment>,
    pub notes: Vec<String>,
    pub error: Option<String>,
}

#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "kebab-case")]
pub enum AppleSpeechMode {
    Speech,
    Dictation,
}

impl AppleSpeechMode {
    #[cfg(target_os = "macos")]
    fn as_helper_arg(self) -> &'static str {
        match self {
            Self::Speech => "speech",
            Self::Dictation => "dictation",
        }
    }

    fn backend_id(self) -> &'static str {
        match self {
            Self::Speech => "apple-speech-transcriber",
            Self::Dictation => "apple-dictation-transcriber",
        }
    }
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AppleSpeechBenchmarkCase {
    pub id: String,
    pub audio_path: PathBuf,
    #[serde(default = "default_eval_content_type")]
    pub content_type: ContentType,
    #[serde(default)]
    pub locale: Option<String>,
    #[serde(default)]
    pub reference_text: String,
    #[serde(default)]
    pub reference_path: Option<PathBuf>,
    #[serde(default)]
    pub required_terms: Vec<String>,
    #[serde(default)]
    pub forbidden_terms: Vec<String>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AppleSpeechBackendBenchmark {
    pub backend_id: String,
    pub status: String,
    pub cold_elapsed_ms: Option<u64>,
    pub warm_elapsed_ms: Option<u64>,
    pub total_elapsed_ms: Option<u64>,
    pub first_result_elapsed_ms: Option<u64>,
    pub word_count: usize,
    pub transcript: String,
    pub segment_count: usize,
    pub has_timestamps: bool,
    pub wer: Option<f64>,
    pub wer_punct_insensitive: Option<f64>,
    pub punctuation_wer_delta: Option<f64>,
    pub required_terms_present: Vec<String>,
    pub required_terms_missing: Vec<String>,
    pub forbidden_terms_found: Vec<String>,
    pub error: Option<String>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AppleSpeechBenchmarkCaseResult {
    pub id: String,
    pub audio_path: PathBuf,
    pub content_type: ContentType,
    pub locale: String,
    pub reference_available: bool,
    pub speech_transcriber: AppleSpeechBackendBenchmark,
    pub dictation_transcriber: AppleSpeechBackendBenchmark,
    pub whisper: AppleSpeechBackendBenchmark,
    pub parakeet: AppleSpeechBackendBenchmark,
}

#[derive(Debug, Clone, Default, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AppleSpeechAggregateMetrics {
    pub cases_total: usize,
    pub cases_succeeded: usize,
    pub cases_with_reference: usize,
    pub average_elapsed_ms: Option<f64>,
    pub average_first_result_elapsed_ms: Option<f64>,
    pub average_wer: Option<f64>,
    pub average_wer_punct_insensitive: Option<f64>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AppleSpeechBenchmarkTotals {
    pub speech_transcriber: AppleSpeechAggregateMetrics,
    pub dictation_transcriber: AppleSpeechAggregateMetrics,
    pub whisper: AppleSpeechAggregateMetrics,
    pub parakeet: AppleSpeechAggregateMetrics,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AppleSpeechBenchmarkSlices {
    pub overall: AppleSpeechBenchmarkTotals,
    pub meeting: AppleSpeechBenchmarkTotals,
    pub dictation: AppleSpeechBenchmarkTotals,
    pub memo: AppleSpeechBenchmarkTotals,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AppleSpeechBenchmarkReport {
    pub generated_at: String,
    pub corpus_path: PathBuf,
    pub configured_engine: String,
    pub capabilities: AppleSpeechCapabilityReport,
    pub cases: Vec<AppleSpeechBenchmarkCaseResult>,
    pub totals: AppleSpeechBenchmarkTotals,
    pub slices: AppleSpeechBenchmarkSlices,
    pub notes: Vec<String>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AppleSpeechBenchmarkRequest {
    pub command: String,
    pub generated_at: String,
    pub corpus_path: PathBuf,
    pub output_root: PathBuf,
    pub configured_engine: String,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct AppleSpeechBenchmarkArtifactPaths {
    pub run_dir: PathBuf,
    pub request_json: PathBuf,
    pub results_json: PathBuf,
    pub summary_md: PathBuf,
}

pub fn default_research_root() -> PathBuf {
    Config::minutes_dir().join("research").join("apple-speech")
}

pub fn live_locale_hint(language: Option<&str>) -> Option<String> {
    let language = language?;
    let trimmed = language.trim();
    if trimmed.is_empty() {
        return None;
    }
    Some(trimmed.replace('-', "_"))
}

pub fn probe_capabilities() -> Result<AppleSpeechCapabilityReport> {
    #[cfg(target_os = "macos")]
    {
        let helper = ensure_helper_installed()?;
        run_helper_capabilities(&helper)
    }

    #[cfg(not(target_os = "macos"))]
    {
        Ok(AppleSpeechCapabilityReport {
            kind: "capabilities".into(),
            schema_version: 1,
            os_version: std::env::consts::OS.into(),
            runtime_supported: false,
            read_only: true,
            speech_transcriber: AppleSpeechModuleCapability {
                module_id: "speech-transcriber".into(),
                is_available: None,
                asset_status: "unsupported".into(),
                supported_locales: Vec::new(),
                installed_locales: Vec::new(),
            },
            dictation_transcriber: AppleSpeechModuleCapability {
                module_id: "dictation-transcriber".into(),
                is_available: None,
                asset_status: "unsupported".into(),
                supported_locales: Vec::new(),
                installed_locales: Vec::new(),
            },
            notes: vec!["Apple Speech evaluation is only available on macOS.".into()],
        })
    }
}

#[cfg(target_os = "macos")]
pub fn transcribe_with_apple_speech(
    audio_path: &Path,
    locale: Option<&str>,
    mode: AppleSpeechMode,
    ensure_assets: bool,
) -> Result<AppleSpeechTranscriptionResult> {
    let helper = ensure_helper_installed()?;
    run_helper_transcription(&helper, audio_path, locale, mode, ensure_assets)
}

#[cfg(not(target_os = "macos"))]
pub fn transcribe_with_apple_speech(
    _audio_path: &Path,
    locale: Option<&str>,
    mode: AppleSpeechMode,
    ensure_assets: bool,
) -> Result<AppleSpeechTranscriptionResult> {
    Ok(AppleSpeechTranscriptionResult {
        kind: "transcription".into(),
        schema_version: 1,
        module_id: mode.backend_id().into(),
        locale: locale.unwrap_or("en-US").into(),
        ensure_assets,
        os_version: std::env::consts::OS.into(),
        runtime_supported: false,
        asset_status_before: "unsupported".into(),
        asset_status_after: "unsupported".into(),
        total_elapsed_ms: 0,
        first_result_elapsed_ms: None,
        transcript: String::new(),
        word_count: 0,
        segments: Vec::new(),
        notes: vec!["Apple Speech evaluation is only available on macOS.".into()],
        error: Some("unsupported platform".into()),
    })
}

pub fn run_benchmark_corpus(
    corpus_path: &Path,
    config: &Config,
) -> Result<AppleSpeechBenchmarkReport> {
    let cases = load_benchmark_cases(corpus_path)?;
    if cases.is_empty() {
        return Err(MinutesError::Io(std::io::Error::new(
            std::io::ErrorKind::InvalidInput,
            "apple-speech benchmark corpus is empty".to_string(),
        )));
    }

    let capabilities = probe_capabilities()?;
    let mut results = Vec::new();
    for case in &cases {
        results.push(run_benchmark_case(case, config)?);
    }
    let notes = vec![
        "Whisper is always benchmarked explicitly as the cross-platform baseline.".into(),
        "Parakeet is benchmarked explicitly when compiled/configured; failures are recorded instead of skipped.".into(),
        "Apple timings are measured by invoking the helper twice per mode and recording the second run as warm.".into(),
        "Current Minutes dictation is a streaming UI path; this benchmark uses file-based backends as a comparable proxy, not a live hotkey benchmark.".into(),
    ];

    Ok(AppleSpeechBenchmarkReport {
        generated_at: Utc::now().to_rfc3339(),
        corpus_path: corpus_path.to_path_buf(),
        configured_engine: config.transcription.engine.clone(),
        capabilities,
        totals: totals_for_cases(&results),
        slices: AppleSpeechBenchmarkSlices {
            overall: totals_for_cases(&results),
            meeting: totals_for_cases(
                &results
                    .iter()
                    .filter(|case| case.content_type == ContentType::Meeting)
                    .cloned()
                    .collect::<Vec<_>>(),
            ),
            dictation: totals_for_cases(
                &results
                    .iter()
                    .filter(|case| case.content_type == ContentType::Dictation)
                    .cloned()
                    .collect::<Vec<_>>(),
            ),
            memo: totals_for_cases(
                &results
                    .iter()
                    .filter(|case| case.content_type == ContentType::Memo)
                    .cloned()
                    .collect::<Vec<_>>(),
            ),
        },
        cases: results,
        notes,
    })
}

fn load_benchmark_cases(corpus_path: &Path) -> Result<Vec<AppleSpeechBenchmarkCase>> {
    let raw = fs::read_to_string(corpus_path)?;
    let mut cases: Vec<AppleSpeechBenchmarkCase> = serde_json::from_str(&raw).map_err(|error| {
        MinutesError::Io(std::io::Error::new(
            std::io::ErrorKind::InvalidData,
            error.to_string(),
        ))
    })?;
    normalize_benchmark_case_paths(&mut cases, corpus_path);
    Ok(cases)
}

fn normalize_benchmark_case_paths(cases: &mut [AppleSpeechBenchmarkCase], corpus_path: &Path) {
    let Some(corpus_dir) = corpus_path.parent() else {
        return;
    };
    for case in cases {
        case.audio_path = resolve_corpus_relative_path(corpus_dir, &case.audio_path);
        if let Some(reference_path) = case.reference_path.as_mut() {
            *reference_path = resolve_corpus_relative_path(corpus_dir, reference_path);
        }
    }
}

fn resolve_corpus_relative_path(base_dir: &Path, path: &Path) -> PathBuf {
    if path.is_absolute() {
        path.to_path_buf()
    } else {
        base_dir.join(path)
    }
}

pub fn write_benchmark_artifacts(
    request: &AppleSpeechBenchmarkRequest,
    report: &AppleSpeechBenchmarkReport,
) -> Result<AppleSpeechBenchmarkArtifactPaths> {
    let run_dir = request
        .output_root
        .join(Utc::now().format("%Y-%m-%dT%H-%M-%SZ").to_string());
    fs::create_dir_all(&run_dir)?;

    let request_json = run_dir.join("request.json");
    let results_json = run_dir.join("results.json");
    let summary_md = run_dir.join("summary.md");

    fs::write(
        &request_json,
        serde_json::to_string_pretty(request).map_err(|error| {
            MinutesError::Io(std::io::Error::new(
                std::io::ErrorKind::InvalidData,
                error.to_string(),
            ))
        })?,
    )?;
    fs::write(
        &results_json,
        serde_json::to_string_pretty(report).map_err(|error| {
            MinutesError::Io(std::io::Error::new(
                std::io::ErrorKind::InvalidData,
                error.to_string(),
            ))
        })?,
    )?;
    fs::write(&summary_md, render_benchmark_summary(report))?;

    Ok(AppleSpeechBenchmarkArtifactPaths {
        run_dir,
        request_json,
        results_json,
        summary_md,
    })
}

pub fn render_benchmark_summary(report: &AppleSpeechBenchmarkReport) -> String {
    let mut lines = Vec::new();
    lines.push("# Apple Speech Benchmark Summary".to_string());
    lines.push(String::new());
    lines.push(format!("- Generated at: `{}`", report.generated_at));
    lines.push(format!("- Corpus: `{}`", report.corpus_path.display()));
    lines.push(format!(
        "- Configured Minutes engine during run: `{}`",
        report.configured_engine
    ));
    lines.push(String::new());
    lines.push("## Capability snapshot".to_string());
    lines.push(String::new());
    lines.push(format!(
        "- SpeechTranscriber available: `{}`",
        report
            .capabilities
            .speech_transcriber
            .is_available
            .map(|value| value.to_string())
            .unwrap_or_else(|| "n/a".into())
    ));
    lines.push(format!(
        "- SpeechTranscriber asset status: `{}`",
        report.capabilities.speech_transcriber.asset_status
    ));
    lines.push(format!(
        "- DictationTranscriber asset status: `{}`",
        report.capabilities.dictation_transcriber.asset_status
    ));
    if !report.capabilities.notes.is_empty() {
        lines.push(format!(
            "- Capability notes: {}",
            report.capabilities.notes.join(" | ")
        ));
    }
    lines.push(String::new());
    lines.push("## Overall metrics".to_string());
    lines.push(String::new());
    for (label, metrics) in [
        ("SpeechTranscriber", &report.totals.speech_transcriber),
        ("DictationTranscriber", &report.totals.dictation_transcriber),
        ("Whisper", &report.totals.whisper),
        ("Parakeet", &report.totals.parakeet),
    ] {
        lines.push(format!(
            "- {}: succeeded `{}/{}`; avg elapsed `{}` ms; avg first-result `{}` ms; avg WER `{}`; avg WER (punct-insensitive) `{}`",
            label,
            metrics.cases_succeeded,
            metrics.cases_total,
            format_optional_f64(metrics.average_elapsed_ms),
            format_optional_f64(metrics.average_first_result_elapsed_ms),
            format_optional_f64(metrics.average_wer.map(|value| value * 100.0)),
            format_optional_f64(
                metrics
                    .average_wer_punct_insensitive
                    .map(|value| value * 100.0)
            ),
        ));
    }
    for (slice_label, totals) in [
        ("meeting", &report.slices.meeting),
        ("dictation", &report.slices.dictation),
        ("memo", &report.slices.memo),
    ] {
        lines.push(String::new());
        lines.push(format!("## {} metrics", slice_label));
        lines.push(String::new());
        for (label, metrics) in [
            ("SpeechTranscriber", &totals.speech_transcriber),
            ("DictationTranscriber", &totals.dictation_transcriber),
            ("Whisper", &totals.whisper),
            ("Parakeet", &totals.parakeet),
        ] {
            lines.push(format!(
                "- {}: succeeded `{}/{}`; avg elapsed `{}` ms; avg first-result `{}` ms; avg WER `{}`; avg WER (punct-insensitive) `{}`",
                label,
                metrics.cases_succeeded,
                metrics.cases_total,
                format_optional_f64(metrics.average_elapsed_ms),
                format_optional_f64(metrics.average_first_result_elapsed_ms),
                format_optional_f64(metrics.average_wer.map(|value| value * 100.0)),
                format_optional_f64(
                    metrics
                        .average_wer_punct_insensitive
                        .map(|value| value * 100.0)
                ),
            ));
        }
    }
    lines.push(String::new());
    lines.push("## Cases".to_string());
    lines.push(String::new());
    for case in &report.cases {
        lines.push(format!(
            "- `{}` [{} {}]",
            case.id,
            content_type_label(case.content_type),
            case.locale
        ));
        lines.push(format!(
            "  speech: {} / {} ms / WER {} / WER no-punct {}{}",
            case.speech_transcriber.status,
            case.speech_transcriber
                .total_elapsed_ms
                .map(|value| value.to_string())
                .unwrap_or_else(|| "n/a".into()),
            format_optional_f64(case.speech_transcriber.wer.map(|value| value * 100.0)),
            format_optional_f64(
                case.speech_transcriber
                    .wer_punct_insensitive
                    .map(|value| value * 100.0)
            ),
            term_quality_suffix(&case.speech_transcriber),
        ));
        lines.push(format!(
            "  dictation: {} / {} ms / WER {} / WER no-punct {}{}",
            case.dictation_transcriber.status,
            case.dictation_transcriber
                .total_elapsed_ms
                .map(|value| value.to_string())
                .unwrap_or_else(|| "n/a".into()),
            format_optional_f64(case.dictation_transcriber.wer.map(|value| value * 100.0)),
            format_optional_f64(
                case.dictation_transcriber
                    .wer_punct_insensitive
                    .map(|value| value * 100.0)
            ),
            term_quality_suffix(&case.dictation_transcriber),
        ));
        lines.push(format!(
            "  whisper: {} / {} ms / WER {} / WER no-punct {}{}",
            case.whisper.status,
            case.whisper
                .total_elapsed_ms
                .map(|value| value.to_string())
                .unwrap_or_else(|| "n/a".into()),
            format_optional_f64(case.whisper.wer.map(|value| value * 100.0)),
            format_optional_f64(
                case.whisper
                    .wer_punct_insensitive
                    .map(|value| value * 100.0)
            ),
            term_quality_suffix(&case.whisper),
        ));
        lines.push(format!(
            "  parakeet: {} / {} ms / WER {} / WER no-punct {}{}",
            case.parakeet.status,
            case.parakeet
                .total_elapsed_ms
                .map(|value| value.to_string())
                .unwrap_or_else(|| "n/a".into()),
            format_optional_f64(case.parakeet.wer.map(|value| value * 100.0)),
            format_optional_f64(
                case.parakeet
                    .wer_punct_insensitive
                    .map(|value| value * 100.0)
            ),
            term_quality_suffix(&case.parakeet),
        ));
    }
    lines.push(String::new());
    lines.push("## Notes".to_string());
    lines.push(String::new());
    for note in &report.notes {
        lines.push(format!("- {}", note));
    }
    lines.join("\n")
}

fn default_eval_content_type() -> ContentType {
    ContentType::Meeting
}

fn term_quality_suffix(result: &AppleSpeechBackendBenchmark) -> String {
    let mut parts = Vec::new();
    if !result.required_terms_missing.is_empty() {
        parts.push(format!(
            "missing required: {}",
            result.required_terms_missing.join(", ")
        ));
    }
    if !result.forbidden_terms_found.is_empty() {
        parts.push(format!(
            "forbidden: {}",
            result.forbidden_terms_found.join(", ")
        ));
    }
    if parts.is_empty() {
        String::new()
    } else {
        format!("; {}", parts.join("; "))
    }
}

fn aggregate_metrics(
    cases: &[AppleSpeechBenchmarkCaseResult],
    select: impl Fn(&AppleSpeechBenchmarkCaseResult) -> &AppleSpeechBackendBenchmark,
) -> AppleSpeechAggregateMetrics {
    let mut metrics = AppleSpeechAggregateMetrics {
        cases_total: cases.len(),
        ..Default::default()
    };

    let mut elapsed_sum = 0f64;
    let mut elapsed_count = 0usize;
    let mut first_sum = 0f64;
    let mut first_count = 0usize;
    let mut wer_sum = 0f64;
    let mut wer_count = 0usize;
    let mut wer_punct_insensitive_sum = 0f64;
    let mut wer_punct_insensitive_count = 0usize;

    for case in cases {
        let result = select(case);
        if result.status == "ok" {
            metrics.cases_succeeded += 1;
            if case.reference_available {
                metrics.cases_with_reference += 1;
            }
            if let Some(elapsed) = result.total_elapsed_ms {
                elapsed_sum += elapsed as f64;
                elapsed_count += 1;
            }
            if let Some(first) = result.first_result_elapsed_ms {
                first_sum += first as f64;
                first_count += 1;
            }
            if let Some(wer) = result.wer {
                wer_sum += wer;
                wer_count += 1;
            }
            if let Some(wer) = result.wer_punct_insensitive {
                wer_punct_insensitive_sum += wer;
                wer_punct_insensitive_count += 1;
            }
        }
    }

    metrics.average_elapsed_ms = average(elapsed_sum, elapsed_count);
    metrics.average_first_result_elapsed_ms = average(first_sum, first_count);
    metrics.average_wer = average(wer_sum, wer_count);
    metrics.average_wer_punct_insensitive =
        average(wer_punct_insensitive_sum, wer_punct_insensitive_count);
    metrics
}

fn totals_for_cases(cases: &[AppleSpeechBenchmarkCaseResult]) -> AppleSpeechBenchmarkTotals {
    AppleSpeechBenchmarkTotals {
        speech_transcriber: aggregate_metrics(cases, |case| &case.speech_transcriber),
        dictation_transcriber: aggregate_metrics(cases, |case| &case.dictation_transcriber),
        whisper: aggregate_metrics(cases, |case| &case.whisper),
        parakeet: aggregate_metrics(cases, |case| &case.parakeet),
    }
}

fn average(sum: f64, count: usize) -> Option<f64> {
    if count == 0 {
        None
    } else {
        Some(sum / count as f64)
    }
}

fn format_optional_f64(value: Option<f64>) -> String {
    value
        .map(|value| format!("{value:.2}"))
        .unwrap_or_else(|| "n/a".into())
}

fn run_benchmark_case(
    case: &AppleSpeechBenchmarkCase,
    config: &Config,
) -> Result<AppleSpeechBenchmarkCaseResult> {
    let locale = case.locale.clone().unwrap_or_else(|| "en-US".into());
    let reference = load_reference_text(case)?;
    let reference_available = reference.is_some();

    let speech_transcriber =
        benchmark_apple_mode(case, &locale, AppleSpeechMode::Speech, reference.as_deref())?;
    let dictation_transcriber = benchmark_apple_mode(
        case,
        &locale,
        AppleSpeechMode::Dictation,
        reference.as_deref(),
    )?;
    let whisper = benchmark_minutes_backend(case, &locale, "whisper", config, reference.as_deref());
    let parakeet =
        benchmark_minutes_backend(case, &locale, "parakeet", config, reference.as_deref());

    Ok(AppleSpeechBenchmarkCaseResult {
        id: case.id.clone(),
        audio_path: case.audio_path.clone(),
        content_type: case.content_type,
        locale,
        reference_available,
        speech_transcriber,
        dictation_transcriber,
        whisper,
        parakeet,
    })
}

fn benchmark_apple_mode(
    case: &AppleSpeechBenchmarkCase,
    locale: &str,
    mode: AppleSpeechMode,
    reference_text: Option<&str>,
) -> Result<AppleSpeechBackendBenchmark> {
    let cold = transcribe_with_apple_speech(&case.audio_path, Some(locale), mode, true)?;
    let warm = transcribe_with_apple_speech(&case.audio_path, Some(locale), mode, true)?;
    let selected = if warm.error.is_none() { &warm } else { &cold };
    Ok(AppleSpeechBackendBenchmark {
        backend_id: mode.backend_id().into(),
        status: if selected.error.is_none() {
            "ok".into()
        } else if selected.runtime_supported {
            "error".into()
        } else {
            "unsupported".into()
        },
        cold_elapsed_ms: Some(cold.total_elapsed_ms),
        warm_elapsed_ms: Some(warm.total_elapsed_ms),
        total_elapsed_ms: Some(selected.total_elapsed_ms),
        first_result_elapsed_ms: selected.first_result_elapsed_ms,
        word_count: selected.word_count,
        transcript: selected.transcript.clone(),
        segment_count: selected.segments.len(),
        has_timestamps: selected
            .segments
            .iter()
            .any(|segment| segment.start_ms > 0 || segment.duration_ms > 0),
        wer: reference_text.map(|reference| word_error_rate(reference, &selected.transcript)),
        wer_punct_insensitive: reference_text
            .map(|reference| word_error_rate_punct_insensitive(reference, &selected.transcript)),
        punctuation_wer_delta: punctuation_wer_delta(reference_text, &selected.transcript),
        required_terms_present: present_terms(&selected.transcript, &case.required_terms),
        required_terms_missing: missing_terms(&selected.transcript, &case.required_terms),
        forbidden_terms_found: present_terms(&selected.transcript, &case.forbidden_terms),
        error: selected.error.clone(),
    })
}

fn benchmark_minutes_backend(
    case: &AppleSpeechBenchmarkCase,
    locale: &str,
    engine: &str,
    config: &Config,
    reference_text: Option<&str>,
) -> AppleSpeechBackendBenchmark {
    let mut config = config.clone();
    config.transcription.engine = engine.into();
    config.transcription.language = locale_language_hint(locale);

    let started = Instant::now();
    let result = match case.content_type {
        ContentType::Meeting => transcribe::transcribe_meeting(&case.audio_path, &config),
        _ => transcribe::transcribe(&case.audio_path, &config),
    };

    match result {
        Ok(result) => AppleSpeechBackendBenchmark {
            backend_id: engine.into(),
            status: "ok".into(),
            cold_elapsed_ms: None,
            warm_elapsed_ms: None,
            total_elapsed_ms: Some(started.elapsed().as_millis() as u64),
            first_result_elapsed_ms: None,
            word_count: result.stats.final_words,
            transcript: result.text.clone(),
            segment_count: result.text.lines().count(),
            has_timestamps: result.text.lines().any(|line| line.starts_with('[')),
            wer: reference_text.map(|reference| word_error_rate(reference, &result.text)),
            wer_punct_insensitive: reference_text
                .map(|reference| word_error_rate_punct_insensitive(reference, &result.text)),
            punctuation_wer_delta: punctuation_wer_delta(reference_text, &result.text),
            required_terms_present: present_terms(&result.text, &case.required_terms),
            required_terms_missing: missing_terms(&result.text, &case.required_terms),
            forbidden_terms_found: present_terms(&result.text, &case.forbidden_terms),
            error: None,
        },
        Err(error) => {
            let is_parakeet_unavailable = engine == "parakeet"
                && matches!(
                    &error,
                    TranscribeError::EngineNotAvailable(_) | TranscribeError::ParakeetFailed(_)
                );
            AppleSpeechBackendBenchmark {
                backend_id: engine.into(),
                status: if is_parakeet_unavailable {
                    "unsupported".into()
                } else {
                    "error".into()
                },
                cold_elapsed_ms: None,
                warm_elapsed_ms: None,
                total_elapsed_ms: None,
                first_result_elapsed_ms: None,
                word_count: 0,
                transcript: String::new(),
                segment_count: 0,
                has_timestamps: false,
                wer: None,
                wer_punct_insensitive: None,
                punctuation_wer_delta: None,
                required_terms_present: vec![],
                required_terms_missing: case.required_terms.clone(),
                forbidden_terms_found: vec![],
                error: Some(error.to_string()),
            }
        }
    }
}

fn load_reference_text(case: &AppleSpeechBenchmarkCase) -> Result<Option<String>> {
    if !case.reference_text.trim().is_empty() {
        return Ok(Some(case.reference_text.clone()));
    }
    let Some(path) = &case.reference_path else {
        return Ok(None);
    };
    Ok(Some(fs::read_to_string(path)?))
}

fn eval_text_for_compare(text: &str) -> String {
    text.lines()
        .filter_map(clean_transcript_line)
        .map(|line| normalize_space(&line).to_lowercase())
        .filter(|line| !line.is_empty())
        .collect::<Vec<_>>()
        .join(" ")
}

fn eval_text_for_compare_punct_insensitive(text: &str) -> String {
    eval_text_for_compare(text)
        .chars()
        .map(|ch| {
            if ch.is_alphanumeric() || ch.is_whitespace() {
                ch
            } else {
                ' '
            }
        })
        .collect::<String>()
        .split_whitespace()
        .collect::<Vec<_>>()
        .join(" ")
}

fn word_error_rate(reference: &str, hypothesis: &str) -> f64 {
    let reference = eval_text_for_compare(reference);
    let hypothesis = eval_text_for_compare(hypothesis);
    let reference_words: Vec<&str> = reference.split_whitespace().collect();
    let hypothesis_words: Vec<&str> = hypothesis.split_whitespace().collect();
    if reference_words.is_empty() {
        return if hypothesis_words.is_empty() {
            0.0
        } else {
            1.0
        };
    }

    let mut dp = vec![vec![0usize; hypothesis_words.len() + 1]; reference_words.len() + 1];
    for (i, row) in dp.iter_mut().enumerate().take(reference_words.len() + 1) {
        row[0] = i;
    }
    for (j, cell) in dp[0]
        .iter_mut()
        .enumerate()
        .take(hypothesis_words.len() + 1)
    {
        *cell = j;
    }
    for i in 1..=reference_words.len() {
        for j in 1..=hypothesis_words.len() {
            let cost = usize::from(reference_words[i - 1] != hypothesis_words[j - 1]);
            dp[i][j] = (dp[i - 1][j] + 1)
                .min(dp[i][j - 1] + 1)
                .min(dp[i - 1][j - 1] + cost);
        }
    }

    dp[reference_words.len()][hypothesis_words.len()] as f64 / reference_words.len() as f64
}

fn word_error_rate_punct_insensitive(reference: &str, hypothesis: &str) -> f64 {
    let reference = eval_text_for_compare_punct_insensitive(reference);
    let hypothesis = eval_text_for_compare_punct_insensitive(hypothesis);
    let reference_words: Vec<&str> = reference.split_whitespace().collect();
    let hypothesis_words: Vec<&str> = hypothesis.split_whitespace().collect();
    if reference_words.is_empty() {
        return if hypothesis_words.is_empty() {
            0.0
        } else {
            1.0
        };
    }

    let mut dp = vec![vec![0usize; hypothesis_words.len() + 1]; reference_words.len() + 1];
    for (i, row) in dp.iter_mut().enumerate().take(reference_words.len() + 1) {
        row[0] = i;
    }
    for (j, cell) in dp[0]
        .iter_mut()
        .enumerate()
        .take(hypothesis_words.len() + 1)
    {
        *cell = j;
    }
    for i in 1..=reference_words.len() {
        for j in 1..=hypothesis_words.len() {
            let cost = usize::from(reference_words[i - 1] != hypothesis_words[j - 1]);
            dp[i][j] = (dp[i - 1][j] + 1)
                .min(dp[i][j - 1] + 1)
                .min(dp[i - 1][j - 1] + cost);
        }
    }

    dp[reference_words.len()][hypothesis_words.len()] as f64 / reference_words.len() as f64
}

fn punctuation_wer_delta(reference_text: Option<&str>, hypothesis: &str) -> Option<f64> {
    reference_text.map(|reference| {
        word_error_rate(reference, hypothesis)
            - word_error_rate_punct_insensitive(reference, hypothesis)
    })
}

fn present_terms(text: &str, terms: &[String]) -> Vec<String> {
    let lower = text.to_lowercase();
    terms
        .iter()
        .filter(|term| lower.contains(&term.to_lowercase()))
        .cloned()
        .collect()
}

fn missing_terms(text: &str, terms: &[String]) -> Vec<String> {
    let lower = text.to_lowercase();
    terms
        .iter()
        .filter(|term| !lower.contains(&term.to_lowercase()))
        .cloned()
        .collect()
}

#[cfg(target_os = "macos")]
fn run_helper_capabilities(helper: &Path) -> Result<AppleSpeechCapabilityReport> {
    let mut command = Command::new(helper);
    command.arg("capabilities");
    let output = output_with_timeout(command, HELPER_TIMEOUT).ok_or_else(|| {
        MinutesError::Io(std::io::Error::new(
            std::io::ErrorKind::TimedOut,
            "apple speech helper capabilities timed out",
        ))
    })?;
    if !output.status.success() {
        return Err(MinutesError::Io(std::io::Error::other(format!(
            "apple speech helper capabilities failed: {}",
            String::from_utf8_lossy(&output.stderr)
        ))));
    }
    serde_json::from_slice(&output.stdout).map_err(|error| {
        MinutesError::Io(std::io::Error::new(
            std::io::ErrorKind::InvalidData,
            error.to_string(),
        ))
    })
}

#[cfg(target_os = "macos")]
fn run_helper_transcription(
    helper: &Path,
    audio_path: &Path,
    locale: Option<&str>,
    mode: AppleSpeechMode,
    ensure_assets: bool,
) -> Result<AppleSpeechTranscriptionResult> {
    let mut command = Command::new(helper);
    command
        .arg("transcribe")
        .args(["--mode", mode.as_helper_arg()])
        .args([
            "--audio-path",
            audio_path.to_str().ok_or_else(|| {
                MinutesError::Io(std::io::Error::new(
                    std::io::ErrorKind::InvalidInput,
                    "audio path is not valid UTF-8",
                ))
            })?,
        ]);
    if let Some(locale) = locale {
        command.args(["--locale", locale]);
    }
    if ensure_assets {
        command.arg("--ensure-assets");
    }

    let output = output_with_timeout(command, HELPER_TRANSCRIBE_TIMEOUT).ok_or_else(|| {
        MinutesError::Io(std::io::Error::new(
            std::io::ErrorKind::TimedOut,
            "apple speech helper transcription timed out",
        ))
    })?;
    let parsed: AppleSpeechTranscriptionResult =
        serde_json::from_slice(&output.stdout).map_err(|error| {
            MinutesError::Io(std::io::Error::new(
                std::io::ErrorKind::InvalidData,
                error.to_string(),
            ))
        })?;
    if !output.status.success() && parsed.error.is_none() {
        return Err(MinutesError::Io(std::io::Error::other(format!(
            "apple speech helper failed: {}",
            String::from_utf8_lossy(&output.stderr)
        ))));
    }
    Ok(parsed)
}

#[cfg(target_os = "macos")]
fn ensure_helper_installed() -> Result<PathBuf> {
    let bin_path = Config::minutes_dir()
        .join("bin")
        .join("apple-speech-helper");
    let source_path = Config::minutes_dir()
        .join("lib")
        .join("apple-speech-helper.swift");

    if let Some(parent) = source_path.parent() {
        fs::create_dir_all(parent)?;
    }
    if let Some(parent) = bin_path.parent() {
        fs::create_dir_all(parent)?;
    }

    let needs_source_write = match fs::read_to_string(&source_path) {
        Ok(existing) => existing != HELPER_SOURCE,
        Err(_) => true,
    };
    if needs_source_write {
        fs::write(&source_path, HELPER_SOURCE)?;
    }

    let needs_compile = match (fs::metadata(&source_path), fs::metadata(&bin_path)) {
        (_, Err(_)) => true,
        (Ok(source_meta), Ok(bin_meta)) => source_meta.modified().ok() > bin_meta.modified().ok(),
        _ => true,
    };
    if needs_compile {
        compile_helper(&source_path, &bin_path)?;
    }

    Ok(bin_path)
}

#[cfg(target_os = "macos")]
fn compile_helper(source_path: &Path, bin_path: &Path) -> Result<()> {
    let output = Command::new("xcrun")
        .arg("swiftc")
        .arg("-parse-as-library")
        .arg("-O")
        .arg(source_path)
        .arg("-o")
        .arg(bin_path)
        .output()
        .or_else(|_| {
            Command::new("swiftc")
                .arg("-parse-as-library")
                .arg("-O")
                .arg(source_path)
                .arg("-o")
                .arg(bin_path)
                .output()
        })?;

    if !output.status.success() {
        return Err(MinutesError::Io(std::io::Error::other(format!(
            "failed to compile apple speech helper: {}",
            String::from_utf8_lossy(&output.stderr)
        ))));
    }

    #[cfg(unix)]
    {
        use std::os::unix::fs::PermissionsExt;
        fs::set_permissions(bin_path, fs::Permissions::from_mode(0o700))?;
    }
    Ok(())
}

fn content_type_label(content_type: ContentType) -> &'static str {
    match content_type {
        ContentType::Meeting => "meeting",
        ContentType::Memo => "memo",
        ContentType::Dictation => "dictation",
    }
}

fn locale_language_hint(locale: &str) -> Option<String> {
    let trimmed = locale.trim();
    if trimmed.is_empty() {
        return None;
    }
    let primary = trimmed.split(['_', '-']).next().unwrap_or(trimmed).trim();
    if primary.is_empty() {
        None
    } else {
        Some(primary.to_string())
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::path::Path;
    use tempfile::tempdir;

    #[test]
    fn locale_language_hint_uses_primary_subtag() {
        assert_eq!(locale_language_hint("en_US"), Some("en".into()));
        assert_eq!(locale_language_hint("pt-BR"), Some("pt".into()));
        assert_eq!(locale_language_hint(""), None);
    }

    #[test]
    fn live_locale_hint_preserves_plain_language_codes() {
        assert_eq!(live_locale_hint(Some("en")), Some("en".into()));
        assert_eq!(live_locale_hint(Some(" fr ")), Some("fr".into()));
        assert_eq!(live_locale_hint(Some("pt-BR")), Some("pt_BR".into()));
        assert_eq!(live_locale_hint(Some("")), None);
        assert_eq!(live_locale_hint(None), None);
    }

    #[test]
    fn word_error_rate_normalizes_timestamped_minutes_output() {
        let reference = "Matt and Wesley are reviewing the Minutes Apple speech benchmark.";
        let hypothesis =
            "[0:00] Matt and Wesley are reviewing the Minute's Apple Speech Benchmark.\n";
        let wer = word_error_rate(reference, hypothesis);
        assert!(wer >= 0.0);
        assert!(wer < 0.34);
    }

    #[test]
    fn benchmark_case_paths_resolve_relative_to_corpus_file() {
        let dir = tempdir().unwrap();
        let corpus_dir = dir.path().join("fixtures");
        std::fs::create_dir_all(corpus_dir.join("audio")).unwrap();
        std::fs::create_dir_all(corpus_dir.join("refs")).unwrap();
        let absolute_audio = dir.path().join("absolute.wav");

        let corpus_path = corpus_dir.join("apple-speech-corpus.json");
        std::fs::write(
            &corpus_path,
            serde_json::json!([
                {
                    "id": "case-1",
                    "audioPath": "audio/sample.wav",
                    "contentType": "meeting",
                    "referencePath": "refs/sample.txt"
                },
                {
                    "id": "case-2",
                    "audioPath": absolute_audio,
                    "contentType": "dictation",
                    "requiredTerms": ["Minutes"],
                    "forbiddenTerms": ["Matt Mullenweg"]
                }
            ])
            .to_string(),
        )
        .unwrap();

        let cases = load_benchmark_cases(&corpus_path).unwrap();

        assert_eq!(
            cases[0].audio_path,
            corpus_dir.join(Path::new("audio").join("sample.wav"))
        );
        assert_eq!(
            cases[0].reference_path,
            Some(corpus_dir.join(Path::new("refs").join("sample.txt")))
        );
        assert_eq!(cases[1].audio_path, absolute_audio);
        assert_eq!(cases[1].required_terms, vec!["Minutes"]);
        assert_eq!(cases[1].forbidden_terms, vec!["Matt Mullenweg"]);
    }

    #[test]
    fn punct_insensitive_wer_ignores_terminal_punctuation() {
        let reference = "Minutes benchmark dictation check. Apple speech should handle short form voice notes locally.";
        let hypothesis =
            "Minutes benchmark dictation check Apple speech should handle short form voice notes locally";
        let punct_sensitive = word_error_rate(reference, hypothesis);
        let punct_insensitive = word_error_rate_punct_insensitive(reference, hypothesis);
        assert!(punct_sensitive > punct_insensitive);
        assert_eq!(punct_insensitive, 0.0);
    }

    #[test]
    fn term_quality_helpers_find_missing_and_forbidden_terms() {
        let text = "Minutes benchmark dictation check mentions Harper.";
        let required = vec!["Minutes".into(), "Apple Speech".into()];
        let forbidden = vec!["Matt Mullenweg".into(), "Harper".into()];

        assert_eq!(present_terms(text, &required), vec!["Minutes"]);
        assert_eq!(missing_terms(text, &required), vec!["Apple Speech"]);
        assert_eq!(present_terms(text, &forbidden), vec!["Harper"]);
    }
}