1use super::{Tool, ToolResult};
10use anyhow::{Context, Result};
11use async_trait::async_trait;
12use serde::Deserialize;
13use serde_json::{Value, json};
14use std::time::Duration;
15
16const REQUEST_TIMEOUT: Duration = Duration::from_secs(900); fn voice_api_url() -> String {
19 std::env::var("CODETETHER_VOICE_API_URL")
20 .unwrap_or_else(|_| "https://voice.quantum-forge.io".to_string())
21}
22
23pub struct AvatarTool {
24 client: reqwest::Client,
25}
26
27impl Default for AvatarTool {
28 fn default() -> Self {
29 Self::new()
30 }
31}
32
33impl AvatarTool {
34 pub fn new() -> Self {
35 let client = reqwest::Client::builder()
36 .timeout(REQUEST_TIMEOUT)
37 .user_agent("CodeTether-Agent/1.0")
38 .build()
39 .expect("Failed to build HTTP client");
40 Self { client }
41 }
42
43 async fn status(&self) -> Result<ToolResult> {
44 let base = voice_api_url();
45 let url = format!("{base}/avatar/status");
46
47 let resp = self
48 .client
49 .get(&url)
50 .timeout(Duration::from_secs(10))
51 .send()
52 .await
53 .map_err(|e| anyhow::anyhow!("Avatar status check failed: {e}"))?;
54
55 if !resp.status().is_success() {
56 let status = resp.status();
57 let body = resp.text().await.unwrap_or_default();
58 return Ok(ToolResult::error(format!(
59 "Avatar status failed ({status}): {body}"
60 )));
61 }
62
63 let body: Value = resp.json().await.context("Failed to parse response")?;
64 let running = body["container_running"].as_bool().unwrap_or(false);
65 let image_size = body["image_size"].as_str().unwrap_or("unknown");
66 let models = body["models"].as_array().map(|m| m.len()).unwrap_or(0);
67 let gpu_sharing = body["gpu_sharing_enabled"].as_bool().unwrap_or(false);
68
69 Ok(ToolResult::success(format!(
70 "Duix Avatar Status:\n\
71 Container: {}\n\
72 Docker Image: {image_size}\n\
73 Models available: {models}\n\
74 GPU time-sharing: {}",
75 if running { "running" } else { "stopped" },
76 if gpu_sharing { "enabled" } else { "disabled" },
77 ))
78 .with_metadata("container_running", json!(running))
79 .with_metadata("models_count", json!(models)))
80 }
81
82 async fn start(&self) -> Result<ToolResult> {
83 let base = voice_api_url();
84 let url = format!("{base}/avatar/start");
85
86 let resp = self
87 .client
88 .post(&url)
89 .timeout(Duration::from_secs(120))
90 .send()
91 .await
92 .map_err(|e| anyhow::anyhow!("Avatar start failed: {e}"))?;
93
94 let body: Value = resp.json().await.context("Failed to parse response")?;
95 let status = body["status"].as_str().unwrap_or("unknown");
96 let message = body["message"].as_str().unwrap_or("");
97
98 if status == "started" {
99 Ok(ToolResult::success(format!(
100 "Duix Avatar container started. {message}"
101 )))
102 } else {
103 Ok(ToolResult::error(format!(
104 "Failed to start avatar container: {message}"
105 )))
106 }
107 }
108
109 async fn stop(&self) -> Result<ToolResult> {
110 let base = voice_api_url();
111 let url = format!("{base}/avatar/stop");
112
113 let resp = self
114 .client
115 .post(&url)
116 .timeout(Duration::from_secs(30))
117 .send()
118 .await
119 .map_err(|e| anyhow::anyhow!("Avatar stop failed: {e}"))?;
120
121 let body: Value = resp.json().await.context("Failed to parse response")?;
122 let message = body["message"].as_str().unwrap_or("Container stopped");
123
124 Ok(ToolResult::success(message))
125 }
126
127 async fn generate(&self, params: &GenerateParams) -> Result<ToolResult> {
128 let base = voice_api_url();
129 let url = format!("{base}/avatar/generate");
130
131 let mut form = reqwest::multipart::Form::new();
132
133 if let Some(ref text) = params.text {
134 form = form.text("text", text.clone());
135 }
136 if let Some(ref audio_url) = params.audio_url {
137 form = form.text("audio_url", audio_url.clone());
138 }
139 if let Some(ref audio_file) = params.audio_file {
140 let file_path = std::path::Path::new(audio_file);
141 if !file_path.exists() {
142 return Ok(ToolResult::error(format!(
143 "Audio file not found: {audio_file}"
144 )));
145 }
146 let file_bytes = tokio::fs::read(file_path)
147 .await
148 .context("Failed to read audio file")?;
149 let file_name = file_path
150 .file_name()
151 .unwrap_or_default()
152 .to_string_lossy()
153 .to_string();
154 let part = reqwest::multipart::Part::bytes(file_bytes)
155 .file_name(file_name)
156 .mime_str("audio/wav")?;
157 form = form.part("audio_file", part);
158 }
159 if let Some(ref model) = params.model_video {
160 form = form.text("model_video", model.clone());
161 }
162 if let Some(ref voice_id) = params.voice_id {
163 form = form.text("voice_id", voice_id.clone());
164 }
165
166 let resp = self
167 .client
168 .post(&url)
169 .multipart(form)
170 .send()
171 .await
172 .map_err(|e| anyhow::anyhow!("Avatar generate request failed: {e}"))?;
173
174 if !resp.status().is_success() {
175 let status = resp.status();
176 let body = resp.text().await.unwrap_or_default();
177 return Ok(ToolResult::error(format!(
178 "Avatar generation failed ({status}): {body}"
179 )));
180 }
181
182 let body: Value = resp.json().await.context("Failed to parse response")?;
183
184 if let Some(error) = body["error"].as_str() {
185 return Ok(ToolResult::error(format!(
186 "Avatar generation error: {error}"
187 )));
188 }
189
190 let video_path = body["video_path"].as_str().unwrap_or("unknown");
191 let job_code = body["job_code"].as_str().unwrap_or("unknown");
192
193 Ok(ToolResult::success(format!(
194 "Avatar video generated!\n\
195 Video: {video_path}\n\
196 Job: {job_code}"
197 ))
198 .with_metadata("video_path", json!(video_path))
199 .with_metadata("job_code", json!(job_code)))
200 }
201
202 async fn generate_from_episode(&self, params: &EpisodeAvatarParams) -> Result<ToolResult> {
203 let base = voice_api_url();
204 let url = format!("{base}/avatar/generate-from-episode");
205
206 let mut form = reqwest::multipart::Form::new()
207 .text("podcast_id", params.podcast_id.clone())
208 .text("episode_id", params.episode_id.clone());
209
210 if let Some(ref model) = params.model_video {
211 form = form.text("model_video", model.clone());
212 }
213 if params.upload_youtube {
214 form = form.text("upload_youtube", "true".to_string());
215 }
216 if let Some(ref privacy) = params.privacy_status {
217 form = form.text("privacy_status", privacy.clone());
218 }
219
220 let resp = self
221 .client
222 .post(&url)
223 .multipart(form)
224 .send()
225 .await
226 .map_err(|e| anyhow::anyhow!("Avatar episode generation failed: {e}"))?;
227
228 if !resp.status().is_success() {
229 let status = resp.status();
230 let body = resp.text().await.unwrap_or_default();
231 return Ok(ToolResult::error(format!(
232 "Avatar episode generation failed ({status}): {body}"
233 )));
234 }
235
236 let body: Value = resp.json().await.context("Failed to parse response")?;
237
238 if let Some(error) = body["error"].as_str() {
239 return Ok(ToolResult::error(format!(
240 "Avatar generation error: {error}"
241 )));
242 }
243
244 let video_path = body["video_path"].as_str().unwrap_or("unknown");
245 let title = body["episode_title"].as_str().unwrap_or("unknown");
246 let mut output = format!(
247 "Avatar video generated from episode!\n\
248 Title: {title}\n\
249 Video: {video_path}"
250 );
251
252 if let Some(yt) = body.get("youtube") {
253 let yt_url = yt["url"].as_str().unwrap_or("unknown");
254 let yt_id = yt["video_id"].as_str().unwrap_or("unknown");
255 output.push_str(&format!(
256 "\n\nUploaded to YouTube!\nURL: {yt_url}\nVideo ID: {yt_id}"
257 ));
258 }
259
260 if let Some(yt_err) = body["youtube_error"].as_str() {
261 output.push_str(&format!("\n\nYouTube upload error: {yt_err}"));
262 }
263
264 Ok(ToolResult::success(output)
265 .with_metadata("video_path", json!(video_path))
266 .with_metadata("episode_title", json!(title)))
267 }
268
269 async fn list_models(&self) -> Result<ToolResult> {
270 let base = voice_api_url();
271 let url = format!("{base}/avatar/models");
272
273 let resp = self
274 .client
275 .get(&url)
276 .send()
277 .await
278 .map_err(|e| anyhow::anyhow!("List models failed: {e}"))?;
279
280 if !resp.status().is_success() {
281 let status = resp.status();
282 let body = resp.text().await.unwrap_or_default();
283 return Ok(ToolResult::error(format!(
284 "List models failed ({status}): {body}"
285 )));
286 }
287
288 let body: Value = resp.json().await.context("Failed to parse response")?;
289 let models = body["models"].as_array();
290
291 match models {
292 Some(models) if !models.is_empty() => {
293 let mut output = format!("Avatar models ({}):\n\n", models.len());
294 for m in models {
295 let name = m["name"].as_str().unwrap_or("?");
296 let path = m["path"].as_str().unwrap_or("?");
297 let size = m["size_bytes"].as_u64().unwrap_or(0);
298 let size_mb = size as f64 / 1_048_576.0;
299 output.push_str(&format!("- {name} ({size_mb:.1}MB)\n Path: {path}\n"));
300 }
301 Ok(ToolResult::success(output).with_metadata("count", json!(models.len())))
302 }
303 _ => Ok(ToolResult::success(
304 "No avatar models found. Upload a silent video of yourself \
305 using action 'upload_model' to create an AI clone.",
306 )),
307 }
308 }
309
310 async fn upload_model(&self, params: &UploadModelParams) -> Result<ToolResult> {
311 let base = voice_api_url();
312 let url = format!("{base}/avatar/upload-model");
313
314 let file_path = std::path::Path::new(¶ms.file_path);
315 if !file_path.exists() {
316 return Ok(ToolResult::error(format!(
317 "Video file not found: {}",
318 params.file_path
319 )));
320 }
321
322 let file_bytes = tokio::fs::read(file_path)
323 .await
324 .context("Failed to read video file")?;
325 let file_name = file_path
326 .file_name()
327 .unwrap_or_default()
328 .to_string_lossy()
329 .to_string();
330 let part = reqwest::multipart::Part::bytes(file_bytes)
331 .file_name(file_name)
332 .mime_str("video/mp4")?;
333
334 let form = reqwest::multipart::Form::new()
335 .text("name", params.name.clone())
336 .part("video", part);
337
338 let resp = self
339 .client
340 .post(&url)
341 .multipart(form)
342 .send()
343 .await
344 .map_err(|e| anyhow::anyhow!("Upload model failed: {e}"))?;
345
346 if !resp.status().is_success() {
347 let status = resp.status();
348 let body = resp.text().await.unwrap_or_default();
349 return Ok(ToolResult::error(format!(
350 "Upload model failed ({status}): {body}"
351 )));
352 }
353
354 let body: Value = resp.json().await.context("Failed to parse response")?;
355 let model_path = body["model_path"].as_str().unwrap_or("unknown");
356 let size = body["size_bytes"].as_u64().unwrap_or(0);
357 let size_mb = size as f64 / 1_048_576.0;
358
359 Ok(ToolResult::success(format!(
360 "Model uploaded!\n\
361 Name: {}\n\
362 Path: {model_path}\n\
363 Size: {size_mb:.1}MB\n\n\
364 You can now use this model for avatar video generation.",
365 params.name
366 ))
367 .with_metadata("model_path", json!(model_path)))
368 }
369}
370
371#[derive(Deserialize)]
372struct Params {
373 action: String,
374 #[serde(default)]
375 text: Option<String>,
376 #[serde(default)]
377 audio_url: Option<String>,
378 #[serde(default)]
379 audio_file: Option<String>,
380 #[serde(default)]
381 model_video: Option<String>,
382 #[serde(default)]
383 voice_id: Option<String>,
384 #[serde(default)]
385 podcast_id: Option<String>,
386 #[serde(default)]
387 episode_id: Option<String>,
388 #[serde(default)]
389 upload_youtube: Option<bool>,
390 #[serde(default)]
391 privacy_status: Option<String>,
392 #[serde(default)]
393 name: Option<String>,
394 #[serde(default)]
395 file_path: Option<String>,
396}
397
398#[derive(Deserialize)]
399struct GenerateParams {
400 text: Option<String>,
401 audio_url: Option<String>,
402 audio_file: Option<String>,
403 model_video: Option<String>,
404 voice_id: Option<String>,
405}
406
407#[derive(Deserialize)]
408struct EpisodeAvatarParams {
409 podcast_id: String,
410 episode_id: String,
411 model_video: Option<String>,
412 upload_youtube: bool,
413 privacy_status: Option<String>,
414}
415
416#[derive(Deserialize)]
417struct UploadModelParams {
418 name: String,
419 file_path: String,
420}
421
422#[async_trait]
423impl Tool for AvatarTool {
424 fn id(&self) -> &str {
425 "avatar"
426 }
427 fn name(&self) -> &str {
428 "Avatar"
429 }
430 fn description(&self) -> &str {
431 "AI digital human video generation using Duix Avatar. Create lip-synced avatar videos \
432 from audio or text. Actions: status (check service), start/stop (manage GPU container), \
433 generate (create avatar video from audio), generate_from_episode (podcast → avatar video → YouTube), \
434 list_models (show available avatar models), upload_model (upload a silent video for cloning). \
435 Supports the full pipeline: text → speech → avatar video → YouTube."
436 }
437 fn parameters(&self) -> Value {
438 json!({
439 "type": "object",
440 "properties": {
441 "action": {
442 "type": "string",
443 "enum": ["status", "start", "stop", "generate", "generate_from_episode", "list_models", "upload_model"],
444 "description": "Action to perform"
445 },
446 "text": {
447 "type": "string",
448 "description": "Text to convert to speech then avatar video (for 'generate')"
449 },
450 "audio_url": {
451 "type": "string",
452 "description": "Path to audio file on server (for 'generate')"
453 },
454 "audio_file": {
455 "type": "string",
456 "description": "Local path to audio file to upload (for 'generate')"
457 },
458 "model_video": {
459 "type": "string",
460 "description": "Path to model video for lip-sync (optional, uses default)"
461 },
462 "voice_id": {
463 "type": "string",
464 "description": "Voice ID for TTS (default: Riley 960f89fc)"
465 },
466 "podcast_id": {
467 "type": "string",
468 "description": "Podcast ID (for 'generate_from_episode')"
469 },
470 "episode_id": {
471 "type": "string",
472 "description": "Episode ID (for 'generate_from_episode')"
473 },
474 "upload_youtube": {
475 "type": "boolean",
476 "description": "Upload generated video to YouTube (for 'generate_from_episode')",
477 "default": false
478 },
479 "privacy_status": {
480 "type": "string",
481 "enum": ["public", "unlisted", "private"],
482 "description": "YouTube privacy status (default: unlisted)"
483 },
484 "name": {
485 "type": "string",
486 "description": "Model name (for 'upload_model')"
487 },
488 "file_path": {
489 "type": "string",
490 "description": "Path to video file (for 'upload_model')"
491 }
492 },
493 "required": ["action"]
494 })
495 }
496
497 async fn execute(&self, params: Value) -> Result<ToolResult> {
498 let p: Params = serde_json::from_value(params).context("Invalid params")?;
499
500 match p.action.as_str() {
501 "status" => self.status().await,
502 "start" => self.start().await,
503 "stop" => self.stop().await,
504 "generate" => {
505 self.generate(&GenerateParams {
506 text: p.text,
507 audio_url: p.audio_url,
508 audio_file: p.audio_file,
509 model_video: p.model_video,
510 voice_id: p.voice_id,
511 })
512 .await
513 }
514 "generate_from_episode" => {
515 let podcast_id = match p.podcast_id {
516 Some(id) if !id.trim().is_empty() => id,
517 _ => {
518 return Ok(ToolResult::structured_error(
519 "MISSING_PARAM",
520 "avatar",
521 "'podcast_id' is required for generate_from_episode",
522 Some(vec!["podcast_id"]),
523 Some(
524 json!({"action": "generate_from_episode", "podcast_id": "abc123", "episode_id": "xyz789"}),
525 ),
526 ));
527 }
528 };
529 let episode_id = match p.episode_id {
530 Some(id) if !id.trim().is_empty() => id,
531 _ => {
532 return Ok(ToolResult::structured_error(
533 "MISSING_PARAM",
534 "avatar",
535 "'episode_id' is required for generate_from_episode",
536 Some(vec!["episode_id"]),
537 Some(
538 json!({"action": "generate_from_episode", "podcast_id": "abc123", "episode_id": "xyz789"}),
539 ),
540 ));
541 }
542 };
543 self.generate_from_episode(&EpisodeAvatarParams {
544 podcast_id,
545 episode_id,
546 model_video: p.model_video,
547 upload_youtube: p.upload_youtube.unwrap_or(false),
548 privacy_status: p.privacy_status,
549 })
550 .await
551 }
552 "list_models" => self.list_models().await,
553 "upload_model" => {
554 let name = match p.name {
555 Some(n) if !n.trim().is_empty() => n,
556 _ => {
557 return Ok(ToolResult::structured_error(
558 "MISSING_PARAM",
559 "avatar",
560 "'name' is required for upload_model",
561 Some(vec!["name"]),
562 Some(
563 json!({"action": "upload_model", "name": "Riley", "file_path": "/path/to/video.mp4"}),
564 ),
565 ));
566 }
567 };
568 let file_path = match p.file_path {
569 Some(f) if !f.trim().is_empty() => f,
570 _ => {
571 return Ok(ToolResult::structured_error(
572 "MISSING_PARAM",
573 "avatar",
574 "'file_path' is required for upload_model",
575 Some(vec!["file_path"]),
576 Some(
577 json!({"action": "upload_model", "name": "Riley", "file_path": "/path/to/video.mp4"}),
578 ),
579 ));
580 }
581 };
582 self.upload_model(&UploadModelParams { name, file_path })
583 .await
584 }
585 other => Ok(ToolResult::structured_error(
586 "INVALID_ACTION",
587 "avatar",
588 &format!(
589 "Unknown action '{other}'. Use: status, start, stop, generate, generate_from_episode, list_models, upload_model"
590 ),
591 None,
592 Some(json!({"action": "status"})),
593 )),
594 }
595 }
596}