vil_audio 0.3.0

VIL Audio Transcription — infrastructure and traits for audio-to-text (I03)
Documentation
use serde::{Deserialize, Serialize};

/// Full transcript result from audio transcription.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Transcript {
    /// The full transcribed text.
    pub text: String,
    /// Time-aligned segments.
    pub segments: Vec<Segment>,
    /// Detected language code.
    pub language: String,
    /// Total audio duration in milliseconds.
    pub duration_ms: u64,
}

/// A time-aligned segment of a transcript.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Segment {
    /// Start time in milliseconds.
    pub start_ms: u64,
    /// End time in milliseconds.
    pub end_ms: u64,
    /// Transcribed text for this segment.
    pub text: String,
    /// Confidence score (0.0 to 1.0).
    pub confidence: f32,
}

impl Segment {
    /// Duration of this segment in milliseconds.
    pub fn duration_ms(&self) -> u64 {
        self.end_ms.saturating_sub(self.start_ms)
    }
}

impl Transcript {
    /// Check if segments are in chronological order.
    pub fn is_ordered(&self) -> bool {
        self.segments
            .windows(2)
            .all(|w| w[0].start_ms <= w[1].start_ms)
    }

    /// Get average confidence across all segments.
    pub fn avg_confidence(&self) -> f32 {
        if self.segments.is_empty() {
            return 0.0;
        }
        let sum: f32 = self.segments.iter().map(|s| s.confidence).sum();
        sum / self.segments.len() as f32
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn make_transcript() -> Transcript {
        Transcript {
            text: "Hello world. How are you?".into(),
            segments: vec![
                Segment {
                    start_ms: 0,
                    end_ms: 1000,
                    text: "Hello world.".into(),
                    confidence: 0.95,
                },
                Segment {
                    start_ms: 1200,
                    end_ms: 2500,
                    text: "How are you?".into(),
                    confidence: 0.88,
                },
            ],
            language: "en".into(),
            duration_ms: 2500,
        }
    }

    #[test]
    fn test_segment_duration() {
        let seg = Segment {
            start_ms: 100,
            end_ms: 500,
            text: "test".into(),
            confidence: 0.9,
        };
        assert_eq!(seg.duration_ms(), 400);
    }

    #[test]
    fn test_transcript_ordered() {
        let t = make_transcript();
        assert!(t.is_ordered());
    }

    #[test]
    fn test_transcript_avg_confidence() {
        let t = make_transcript();
        let avg = t.avg_confidence();
        assert!(avg > 0.9 && avg < 0.92);
    }

    #[test]
    fn test_transcript_unordered() {
        let t = Transcript {
            text: "".into(),
            segments: vec![
                Segment {
                    start_ms: 2000,
                    end_ms: 3000,
                    text: "b".into(),
                    confidence: 0.9,
                },
                Segment {
                    start_ms: 0,
                    end_ms: 1000,
                    text: "a".into(),
                    confidence: 0.9,
                },
            ],
            language: "en".into(),
            duration_ms: 3000,
        };
        assert!(!t.is_ordered());
    }
}