use anyhow::Result;
use std::sync::Arc;
use std::time::{Duration, SystemTime};
use tokio::sync::RwLock;
use tokio::time;
use vtt_rs::{Config, TranscriptionEvent, TranscriptionService};
#[derive(Clone)]
struct TimestampedTranscription {
text: String,
timestamp: SystemTime,
#[allow(dead_code)]
chunk_id: usize,
}
struct StreamingAgent {
history: Arc<RwLock<Vec<TimestampedTranscription>>>,
context_window: Duration,
}
impl StreamingAgent {
fn new(context_window_secs: u64) -> Self {
Self {
history: Arc::new(RwLock::new(Vec::new())),
context_window: Duration::from_secs(context_window_secs),
}
}
async fn add_transcription(&self, chunk_id: usize, text: String) {
if text.is_empty() {
return;
}
let transcription = TimestampedTranscription {
text,
timestamp: SystemTime::now(),
chunk_id,
};
let mut history = self.history.write().await;
history.push(transcription);
self.cleanup_old_entries(&mut history).await;
}
async fn cleanup_old_entries(&self, history: &mut Vec<TimestampedTranscription>) {
let cutoff = SystemTime::now() - self.context_window;
history.retain(|t| t.timestamp >= cutoff);
}
async fn get_recent_context(&self) -> Vec<TimestampedTranscription> {
let history = self.history.read().await;
history.clone()
}
async fn analyze_patterns(&self) {
let context = self.get_recent_context().await;
if context.is_empty() {
return;
}
let total_words: usize = context
.iter()
.map(|t| t.text.split_whitespace().count())
.sum();
let avg_words_per_chunk = if !context.is_empty() {
total_words / context.len()
} else {
0
};
println!(
"[Analysis] Recent activity: {} chunks, {} total words, avg {:.1} words/chunk",
context.len(),
total_words,
avg_words_per_chunk as f32
);
if avg_words_per_chunk > 5 {
println!("[Analysis] Active conversation detected");
} else {
println!("[Analysis] Sparse audio detected");
}
}
async fn search_context(&self, keyword: &str) -> Vec<String> {
let context = self.get_recent_context().await;
context
.iter()
.filter(|t| t.text.to_lowercase().contains(keyword))
.map(|t| t.text.clone())
.collect()
}
}
#[tokio::main]
async fn main() -> Result<()> {
println!("=== Streaming AI Agent ===\n");
let api_key = std::env::var("OPENAI_API_KEY").expect("Set OPENAI_API_KEY environment variable");
let config = Config {
chunk_duration_secs: 2,
model: "whisper-1".to_string(),
endpoint: "https://api.openai.com/v1/audio/transcriptions".to_string(),
out_file: None,
};
let agent = Arc::new(StreamingAgent::new(30));
let mut service = TranscriptionService::new(config, api_key)?;
let (mut receiver, _stream) = service.start().await?;
println!("Agent is listening...");
println!("Context window: 30 seconds");
println!("Press Ctrl+C to stop.\n");
let analysis_agent = agent.clone();
tokio::spawn(async move {
let mut interval = time::interval(Duration::from_secs(10));
loop {
interval.tick().await;
analysis_agent.analyze_patterns().await;
}
});
while let Some(event) = receiver.recv().await {
match event {
TranscriptionEvent::Transcription { chunk_id, text } => {
if !text.is_empty() {
println!("[{}] {}", chunk_id, text);
agent.add_transcription(chunk_id, text.clone()).await;
if text.to_lowercase().contains("important") {
println!("[Alert] Important keyword detected!");
let related = agent.search_context("important").await;
println!("[Alert] Related context: {:?}", related);
}
}
}
TranscriptionEvent::Error { chunk_id, error } => {
eprintln!("[Error] Chunk {}: {}", chunk_id, error);
}
}
}
Ok(())
}