aethershell 0.3.1

The world's first multi-agent shell with typed functional pipelines and multi-modal AI
Documentation
/* AetherShell TUI Multimodal Demo
   Launch with: ae tui
   
   This demo showcases multimodal AI capabilities:
   - Image analysis
   - Audio processing
   - Video understanding
   - Mixed media conversations
*/

/* Analyze an image with GPT-4o */
ai "openai:gpt-4o" {
  prompt: "What's in this image? Describe it in detail.",
  media: ["./demos/sample_image.jpg"]
}

/* Audio transcription and analysis */
ai "openai:gpt-4o" {
  prompt: "Transcribe this audio and summarize the main points.",
  media: ["./demos/sample_audio.mp3"]
}

/* Video understanding */
ai "openai:gpt-4o" {
  prompt: "What's happening in this video? Describe the key actions.",
  media: ["./demos/sample_video.mp4"]
}

/* Multiple images comparison */
ai "openai:gpt-4o" {
  prompt: "Compare these two images and describe the differences.",
  media: ["./demos/image1.jpg", "./demos/image2.jpg"]
}

/* Mixed media: text, image, and context */
ai "openai:gpt-4o" {
  prompt: "Based on this diagram and the audio explanation, summarize the concept.",
  media: ["./demos/diagram.png", "./demos/explanation.mp3"]
}

/* The TUI provides:
   - Media preview pane (images, audio waveforms, video thumbnails)
   - Drag-and-drop media attachment
   - Media gallery navigation
   - Format support: JPG, PNG, MP3, WAV, MP4, WebM
   - Base64 encoding for API transmission
   - Progress indicators for large files
*/

print "TUI Multimodal Demo Ready! Launch with: ae tui"
print "Note: Replace sample media paths with your actual files"