Skip to main content

codetether_agent/tool/
avatar.rs

1//! Avatar Tool - AI digital human video generation via Duix Avatar.
2//!
3//! Connects to the Duix Avatar integration on the Voice API to:
4//! - Generate lip-synced avatar videos from audio or text
5//! - Manage avatar models (upload, list)
6//! - Control the Duix Avatar container (start/stop for GPU sharing)
7//! - Create avatar videos from podcast episodes with optional YouTube upload
8
9use super::{Tool, ToolResult};
10use anyhow::{Context, Result};
11use async_trait::async_trait;
12use serde::Deserialize;
13use serde_json::{Value, json};
14use std::time::Duration;
15
16const REQUEST_TIMEOUT: Duration = Duration::from_secs(900); // Avatar video gen can be slow
17
18fn voice_api_url() -> String {
19    std::env::var("CODETETHER_VOICE_API_URL")
20        .unwrap_or_else(|_| "https://voice.quantum-forge.io".to_string())
21}
22
23pub struct AvatarTool {
24    client: reqwest::Client,
25}
26
27impl Default for AvatarTool {
28    fn default() -> Self {
29        Self::new()
30    }
31}
32
33impl AvatarTool {
34    pub fn new() -> Self {
35        let client = reqwest::Client::builder()
36            .timeout(REQUEST_TIMEOUT)
37            .user_agent("CodeTether-Agent/1.0")
38            .build()
39            .expect("Failed to build HTTP client");
40        Self { client }
41    }
42
43    async fn status(&self) -> Result<ToolResult> {
44        let base = voice_api_url();
45        let url = format!("{base}/avatar/status");
46
47        let resp = self
48            .client
49            .get(&url)
50            .timeout(Duration::from_secs(10))
51            .send()
52            .await
53            .map_err(|e| anyhow::anyhow!("Avatar status check failed: {e}"))?;
54
55        if !resp.status().is_success() {
56            let status = resp.status();
57            let body = resp.text().await.unwrap_or_default();
58            return Ok(ToolResult::error(format!(
59                "Avatar status failed ({status}): {body}"
60            )));
61        }
62
63        let body: Value = resp.json().await.context("Failed to parse response")?;
64        let running = body["container_running"].as_bool().unwrap_or(false);
65        let image_size = body["image_size"].as_str().unwrap_or("unknown");
66        let models = body["models"].as_array().map(|m| m.len()).unwrap_or(0);
67        let gpu_sharing = body["gpu_sharing_enabled"].as_bool().unwrap_or(false);
68
69        Ok(ToolResult::success(format!(
70            "Duix Avatar Status:\n\
71             Container: {}\n\
72             Docker Image: {image_size}\n\
73             Models available: {models}\n\
74             GPU time-sharing: {}",
75            if running { "running" } else { "stopped" },
76            if gpu_sharing { "enabled" } else { "disabled" },
77        ))
78        .with_metadata("container_running", json!(running))
79        .with_metadata("models_count", json!(models)))
80    }
81
82    async fn start(&self) -> Result<ToolResult> {
83        let base = voice_api_url();
84        let url = format!("{base}/avatar/start");
85
86        let resp = self
87            .client
88            .post(&url)
89            .timeout(Duration::from_secs(120))
90            .send()
91            .await
92            .map_err(|e| anyhow::anyhow!("Avatar start failed: {e}"))?;
93
94        let body: Value = resp.json().await.context("Failed to parse response")?;
95        let status = body["status"].as_str().unwrap_or("unknown");
96        let message = body["message"].as_str().unwrap_or("");
97
98        if status == "started" {
99            Ok(ToolResult::success(format!(
100                "Duix Avatar container started. {message}"
101            )))
102        } else {
103            Ok(ToolResult::error(format!(
104                "Failed to start avatar container: {message}"
105            )))
106        }
107    }
108
109    async fn stop(&self) -> Result<ToolResult> {
110        let base = voice_api_url();
111        let url = format!("{base}/avatar/stop");
112
113        let resp = self
114            .client
115            .post(&url)
116            .timeout(Duration::from_secs(30))
117            .send()
118            .await
119            .map_err(|e| anyhow::anyhow!("Avatar stop failed: {e}"))?;
120
121        let body: Value = resp.json().await.context("Failed to parse response")?;
122        let message = body["message"].as_str().unwrap_or("Container stopped");
123
124        Ok(ToolResult::success(message))
125    }
126
127    async fn generate(&self, params: &GenerateParams) -> Result<ToolResult> {
128        let base = voice_api_url();
129        let url = format!("{base}/avatar/generate");
130
131        let mut form = reqwest::multipart::Form::new();
132
133        if let Some(ref text) = params.text {
134            form = form.text("text", text.clone());
135        }
136        if let Some(ref audio_url) = params.audio_url {
137            form = form.text("audio_url", audio_url.clone());
138        }
139        if let Some(ref audio_file) = params.audio_file {
140            let file_path = std::path::Path::new(audio_file);
141            if !file_path.exists() {
142                return Ok(ToolResult::error(format!(
143                    "Audio file not found: {audio_file}"
144                )));
145            }
146            let file_bytes = tokio::fs::read(file_path)
147                .await
148                .context("Failed to read audio file")?;
149            let file_name = file_path
150                .file_name()
151                .unwrap_or_default()
152                .to_string_lossy()
153                .to_string();
154            let part = reqwest::multipart::Part::bytes(file_bytes)
155                .file_name(file_name)
156                .mime_str("audio/wav")?;
157            form = form.part("audio_file", part);
158        }
159        if let Some(ref model) = params.model_video {
160            form = form.text("model_video", model.clone());
161        }
162        if let Some(ref voice_id) = params.voice_id {
163            form = form.text("voice_id", voice_id.clone());
164        }
165
166        let resp = self
167            .client
168            .post(&url)
169            .multipart(form)
170            .send()
171            .await
172            .map_err(|e| anyhow::anyhow!("Avatar generate request failed: {e}"))?;
173
174        if !resp.status().is_success() {
175            let status = resp.status();
176            let body = resp.text().await.unwrap_or_default();
177            return Ok(ToolResult::error(format!(
178                "Avatar generation failed ({status}): {body}"
179            )));
180        }
181
182        let body: Value = resp.json().await.context("Failed to parse response")?;
183
184        if let Some(error) = body["error"].as_str() {
185            return Ok(ToolResult::error(format!(
186                "Avatar generation error: {error}"
187            )));
188        }
189
190        let video_path = body["video_path"].as_str().unwrap_or("unknown");
191        let job_code = body["job_code"].as_str().unwrap_or("unknown");
192
193        Ok(ToolResult::success(format!(
194            "Avatar video generated!\n\
195             Video: {video_path}\n\
196             Job: {job_code}"
197        ))
198        .with_metadata("video_path", json!(video_path))
199        .with_metadata("job_code", json!(job_code)))
200    }
201
202    async fn generate_from_episode(&self, params: &EpisodeAvatarParams) -> Result<ToolResult> {
203        let base = voice_api_url();
204        let url = format!("{base}/avatar/generate-from-episode");
205
206        let mut form = reqwest::multipart::Form::new()
207            .text("podcast_id", params.podcast_id.clone())
208            .text("episode_id", params.episode_id.clone());
209
210        if let Some(ref model) = params.model_video {
211            form = form.text("model_video", model.clone());
212        }
213        if params.upload_youtube {
214            form = form.text("upload_youtube", "true".to_string());
215        }
216        if let Some(ref privacy) = params.privacy_status {
217            form = form.text("privacy_status", privacy.clone());
218        }
219
220        let resp = self
221            .client
222            .post(&url)
223            .multipart(form)
224            .send()
225            .await
226            .map_err(|e| anyhow::anyhow!("Avatar episode generation failed: {e}"))?;
227
228        if !resp.status().is_success() {
229            let status = resp.status();
230            let body = resp.text().await.unwrap_or_default();
231            return Ok(ToolResult::error(format!(
232                "Avatar episode generation failed ({status}): {body}"
233            )));
234        }
235
236        let body: Value = resp.json().await.context("Failed to parse response")?;
237
238        if let Some(error) = body["error"].as_str() {
239            return Ok(ToolResult::error(format!(
240                "Avatar generation error: {error}"
241            )));
242        }
243
244        let video_path = body["video_path"].as_str().unwrap_or("unknown");
245        let title = body["episode_title"].as_str().unwrap_or("unknown");
246        let mut output = format!(
247            "Avatar video generated from episode!\n\
248             Title: {title}\n\
249             Video: {video_path}"
250        );
251
252        if let Some(yt) = body.get("youtube") {
253            let yt_url = yt["url"].as_str().unwrap_or("unknown");
254            let yt_id = yt["video_id"].as_str().unwrap_or("unknown");
255            output.push_str(&format!(
256                "\n\nUploaded to YouTube!\nURL: {yt_url}\nVideo ID: {yt_id}"
257            ));
258        }
259
260        if let Some(yt_err) = body["youtube_error"].as_str() {
261            output.push_str(&format!("\n\nYouTube upload error: {yt_err}"));
262        }
263
264        Ok(ToolResult::success(output)
265            .with_metadata("video_path", json!(video_path))
266            .with_metadata("episode_title", json!(title)))
267    }
268
269    async fn list_models(&self) -> Result<ToolResult> {
270        let base = voice_api_url();
271        let url = format!("{base}/avatar/models");
272
273        let resp = self
274            .client
275            .get(&url)
276            .send()
277            .await
278            .map_err(|e| anyhow::anyhow!("List models failed: {e}"))?;
279
280        if !resp.status().is_success() {
281            let status = resp.status();
282            let body = resp.text().await.unwrap_or_default();
283            return Ok(ToolResult::error(format!(
284                "List models failed ({status}): {body}"
285            )));
286        }
287
288        let body: Value = resp.json().await.context("Failed to parse response")?;
289        let models = body["models"].as_array();
290
291        match models {
292            Some(models) if !models.is_empty() => {
293                let mut output = format!("Avatar models ({}):\n\n", models.len());
294                for m in models {
295                    let name = m["name"].as_str().unwrap_or("?");
296                    let path = m["path"].as_str().unwrap_or("?");
297                    let size = m["size_bytes"].as_u64().unwrap_or(0);
298                    let size_mb = size as f64 / 1_048_576.0;
299                    output.push_str(&format!("- {name} ({size_mb:.1}MB)\n  Path: {path}\n"));
300                }
301                Ok(ToolResult::success(output).with_metadata("count", json!(models.len())))
302            }
303            _ => Ok(ToolResult::success(
304                "No avatar models found. Upload a silent video of yourself \
305                 using action 'upload_model' to create an AI clone.",
306            )),
307        }
308    }
309
310    async fn upload_model(&self, params: &UploadModelParams) -> Result<ToolResult> {
311        let base = voice_api_url();
312        let url = format!("{base}/avatar/upload-model");
313
314        let file_path = std::path::Path::new(&params.file_path);
315        if !file_path.exists() {
316            return Ok(ToolResult::error(format!(
317                "Video file not found: {}",
318                params.file_path
319            )));
320        }
321
322        let file_bytes = tokio::fs::read(file_path)
323            .await
324            .context("Failed to read video file")?;
325        let file_name = file_path
326            .file_name()
327            .unwrap_or_default()
328            .to_string_lossy()
329            .to_string();
330        let part = reqwest::multipart::Part::bytes(file_bytes)
331            .file_name(file_name)
332            .mime_str("video/mp4")?;
333
334        let form = reqwest::multipart::Form::new()
335            .text("name", params.name.clone())
336            .part("video", part);
337
338        let resp = self
339            .client
340            .post(&url)
341            .multipart(form)
342            .send()
343            .await
344            .map_err(|e| anyhow::anyhow!("Upload model failed: {e}"))?;
345
346        if !resp.status().is_success() {
347            let status = resp.status();
348            let body = resp.text().await.unwrap_or_default();
349            return Ok(ToolResult::error(format!(
350                "Upload model failed ({status}): {body}"
351            )));
352        }
353
354        let body: Value = resp.json().await.context("Failed to parse response")?;
355        let model_path = body["model_path"].as_str().unwrap_or("unknown");
356        let size = body["size_bytes"].as_u64().unwrap_or(0);
357        let size_mb = size as f64 / 1_048_576.0;
358
359        Ok(ToolResult::success(format!(
360            "Model uploaded!\n\
361             Name: {}\n\
362             Path: {model_path}\n\
363             Size: {size_mb:.1}MB\n\n\
364             You can now use this model for avatar video generation.",
365            params.name
366        ))
367        .with_metadata("model_path", json!(model_path)))
368    }
369}
370
371#[derive(Deserialize)]
372struct Params {
373    action: String,
374    #[serde(default)]
375    text: Option<String>,
376    #[serde(default)]
377    audio_url: Option<String>,
378    #[serde(default)]
379    audio_file: Option<String>,
380    #[serde(default)]
381    model_video: Option<String>,
382    #[serde(default)]
383    voice_id: Option<String>,
384    #[serde(default)]
385    podcast_id: Option<String>,
386    #[serde(default)]
387    episode_id: Option<String>,
388    #[serde(default)]
389    upload_youtube: Option<bool>,
390    #[serde(default)]
391    privacy_status: Option<String>,
392    #[serde(default)]
393    name: Option<String>,
394    #[serde(default)]
395    file_path: Option<String>,
396}
397
398#[derive(Deserialize)]
399struct GenerateParams {
400    text: Option<String>,
401    audio_url: Option<String>,
402    audio_file: Option<String>,
403    model_video: Option<String>,
404    voice_id: Option<String>,
405}
406
407#[derive(Deserialize)]
408struct EpisodeAvatarParams {
409    podcast_id: String,
410    episode_id: String,
411    model_video: Option<String>,
412    upload_youtube: bool,
413    privacy_status: Option<String>,
414}
415
416#[derive(Deserialize)]
417struct UploadModelParams {
418    name: String,
419    file_path: String,
420}
421
422#[async_trait]
423impl Tool for AvatarTool {
424    fn id(&self) -> &str {
425        "avatar"
426    }
427    fn name(&self) -> &str {
428        "Avatar"
429    }
430    fn description(&self) -> &str {
431        "AI digital human video generation using Duix Avatar. Create lip-synced avatar videos \
432         from audio or text. Actions: status (check service), start/stop (manage GPU container), \
433         generate (create avatar video from audio), generate_from_episode (podcast → avatar video → YouTube), \
434         list_models (show available avatar models), upload_model (upload a silent video for cloning). \
435         Supports the full pipeline: text → speech → avatar video → YouTube."
436    }
437    fn parameters(&self) -> Value {
438        json!({
439            "type": "object",
440            "properties": {
441                "action": {
442                    "type": "string",
443                    "enum": ["status", "start", "stop", "generate", "generate_from_episode", "list_models", "upload_model"],
444                    "description": "Action to perform"
445                },
446                "text": {
447                    "type": "string",
448                    "description": "Text to convert to speech then avatar video (for 'generate')"
449                },
450                "audio_url": {
451                    "type": "string",
452                    "description": "Path to audio file on server (for 'generate')"
453                },
454                "audio_file": {
455                    "type": "string",
456                    "description": "Local path to audio file to upload (for 'generate')"
457                },
458                "model_video": {
459                    "type": "string",
460                    "description": "Path to model video for lip-sync (optional, uses default)"
461                },
462                "voice_id": {
463                    "type": "string",
464                    "description": "Voice ID for TTS (default: Riley 960f89fc)"
465                },
466                "podcast_id": {
467                    "type": "string",
468                    "description": "Podcast ID (for 'generate_from_episode')"
469                },
470                "episode_id": {
471                    "type": "string",
472                    "description": "Episode ID (for 'generate_from_episode')"
473                },
474                "upload_youtube": {
475                    "type": "boolean",
476                    "description": "Upload generated video to YouTube (for 'generate_from_episode')",
477                    "default": false
478                },
479                "privacy_status": {
480                    "type": "string",
481                    "enum": ["public", "unlisted", "private"],
482                    "description": "YouTube privacy status (default: unlisted)"
483                },
484                "name": {
485                    "type": "string",
486                    "description": "Model name (for 'upload_model')"
487                },
488                "file_path": {
489                    "type": "string",
490                    "description": "Path to video file (for 'upload_model')"
491                }
492            },
493            "required": ["action"]
494        })
495    }
496
497    async fn execute(&self, params: Value) -> Result<ToolResult> {
498        let p: Params = serde_json::from_value(params).context("Invalid params")?;
499
500        match p.action.as_str() {
501            "status" => self.status().await,
502            "start" => self.start().await,
503            "stop" => self.stop().await,
504            "generate" => {
505                self.generate(&GenerateParams {
506                    text: p.text,
507                    audio_url: p.audio_url,
508                    audio_file: p.audio_file,
509                    model_video: p.model_video,
510                    voice_id: p.voice_id,
511                })
512                .await
513            }
514            "generate_from_episode" => {
515                let podcast_id = match p.podcast_id {
516                    Some(id) if !id.trim().is_empty() => id,
517                    _ => {
518                        return Ok(ToolResult::structured_error(
519                            "MISSING_PARAM",
520                            "avatar",
521                            "'podcast_id' is required for generate_from_episode",
522                            Some(vec!["podcast_id"]),
523                            Some(
524                                json!({"action": "generate_from_episode", "podcast_id": "abc123", "episode_id": "xyz789"}),
525                            ),
526                        ));
527                    }
528                };
529                let episode_id = match p.episode_id {
530                    Some(id) if !id.trim().is_empty() => id,
531                    _ => {
532                        return Ok(ToolResult::structured_error(
533                            "MISSING_PARAM",
534                            "avatar",
535                            "'episode_id' is required for generate_from_episode",
536                            Some(vec!["episode_id"]),
537                            Some(
538                                json!({"action": "generate_from_episode", "podcast_id": "abc123", "episode_id": "xyz789"}),
539                            ),
540                        ));
541                    }
542                };
543                self.generate_from_episode(&EpisodeAvatarParams {
544                    podcast_id,
545                    episode_id,
546                    model_video: p.model_video,
547                    upload_youtube: p.upload_youtube.unwrap_or(false),
548                    privacy_status: p.privacy_status,
549                })
550                .await
551            }
552            "list_models" => self.list_models().await,
553            "upload_model" => {
554                let name = match p.name {
555                    Some(n) if !n.trim().is_empty() => n,
556                    _ => {
557                        return Ok(ToolResult::structured_error(
558                            "MISSING_PARAM",
559                            "avatar",
560                            "'name' is required for upload_model",
561                            Some(vec!["name"]),
562                            Some(
563                                json!({"action": "upload_model", "name": "Riley", "file_path": "/path/to/video.mp4"}),
564                            ),
565                        ));
566                    }
567                };
568                let file_path = match p.file_path {
569                    Some(f) if !f.trim().is_empty() => f,
570                    _ => {
571                        return Ok(ToolResult::structured_error(
572                            "MISSING_PARAM",
573                            "avatar",
574                            "'file_path' is required for upload_model",
575                            Some(vec!["file_path"]),
576                            Some(
577                                json!({"action": "upload_model", "name": "Riley", "file_path": "/path/to/video.mp4"}),
578                            ),
579                        ));
580                    }
581                };
582                self.upload_model(&UploadModelParams { name, file_path })
583                    .await
584            }
585            other => Ok(ToolResult::structured_error(
586                "INVALID_ACTION",
587                "avatar",
588                &format!(
589                    "Unknown action '{other}'. Use: status, start, stop, generate, generate_from_episode, list_models, upload_model"
590                ),
591                None,
592                Some(json!({"action": "status"})),
593            )),
594        }
595    }
596}