seedance 0.1.3

Generate video with ByteDance Seedance 2.0 from the terminal. Agent-friendly.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
use clap::{Args, Parser, Subcommand, ValueEnum};

#[derive(Parser)]
#[command(
    name = "seedance",
    version,
    about = "Generate video with ByteDance Seedance 2.0 from the terminal.",
    long_about = "Generate video with ByteDance Seedance 2.0 via the BytePlus ModelArk API.

Supports text-to-video, image-to-video (first / first+last / up to 9 reference images),
reference videos, reference audio, and multimodal mixes. Use time-coded prompts like
`[Image 1] ... [Video 1] ... [Audio 1] ...` and `[0-4s]: shot description` for multi-shot control.",
    after_long_help = HELP_FOOTER,
)]
pub struct Cli {
    /// Force JSON output even in a terminal
    #[arg(long, global = true)]
    pub json: bool,

    /// Suppress informational output
    #[arg(long, global = true)]
    pub quiet: bool,

    #[command(subcommand)]
    pub command: Commands,
}

const HELP_FOOTER: &str = "\
Tips:
  * Run `seedance agent-info | jq` for the full capability manifest
  * Get an API key from https://console.byteplus.com/ark, then save it with:
      seedance config set api-key ark-xxxxxxxx (stored at chmod 600, never echoed)
    or export SEEDANCE_API_KEY / ARK_API_KEY
  * Reference files: images and audio can be local paths (base64-encoded inline) OR URLs
  * Videos must be URLs -- the API does not accept base64 for video
  * Audio alone is not allowed -- Seedance requires at least one image or video alongside audio
  * Known quirk: uploading audio mutates lyrics. Workaround: render a silent MP4 with the audio
    baked in, then pass it as --video (credit: @simeonnz via @MrDavids1)
  * Use --wait to block until the task finishes and download the result in one command
  * Real human faces in references are blocked -- use faces from previously generated Seedance videos
  * Default output dir: ~/Documents/seedance/<task-id>.mp4 (override with -o /path/to/file.mp4)
  * For a consistent person: `seedance character-sheet <photo>` (uses nanaban), then
    pass the resulting PNG as --image. Bypasses Seedance's single-face upload block.
  * For exact music/dialogue: `seedance audio-to-video <audio>` (uses ffmpeg) -> host the
    resulting mp4 -> pass as --video. Preserves lyrics that --audio would otherwise rewrite.

Examples:
  seedance generate --prompt \"A cat yawns at the camera\" --wait --output cat.mp4
    Text-to-video, blocks until the mp4 lands on disk

  seedance generate --prompt \"[Image 1] the boy waves\" --image boy.png --duration 8 --wait -o out.mp4
    Single reference image + prompt, 8 seconds, wait and download

  seedance generate --first-frame first.png --last-frame last.png --prompt \"morph between them\"
    First+last frame mode -- returns a task id, poll separately

  seedance generate --prompt \"...\" --image a.png --image b.png --video ref.mp4 --fast --wait -o out.mp4
    Multimodal reference-to-video using the fast tier

  seedance status cgt-20260416-abcd1234
    Poll a task; prints video_url when succeeded

  seedance download cgt-20260416-abcd1234 --output final.mp4
    Download the video for a completed task

  seedance doctor
    Verify API key + base URL reachability before running a real generation";

#[derive(Clone, Copy, ValueEnum, serde::Serialize, Debug)]
#[serde(rename_all = "lowercase")]
pub enum Resolution {
    #[value(name = "480p")]
    P480,
    #[value(name = "720p")]
    P720,
}

impl Resolution {
    pub fn as_api(&self) -> &'static str {
        match self {
            Self::P480 => "480p",
            Self::P720 => "720p",
        }
    }
}

#[derive(Clone, Copy, ValueEnum, serde::Serialize, Debug)]
#[serde(rename_all = "kebab-case")]
pub enum Ratio {
    #[value(name = "16:9")]
    Sixteen9,
    #[value(name = "4:3")]
    Four3,
    #[value(name = "1:1")]
    One1,
    #[value(name = "3:4")]
    Three4,
    #[value(name = "9:16")]
    Nine16,
    #[value(name = "21:9")]
    TwentyOne9,
    Adaptive,
}

impl Ratio {
    pub fn as_api(&self) -> &'static str {
        match self {
            Self::Sixteen9 => "16:9",
            Self::Four3 => "4:3",
            Self::One1 => "1:1",
            Self::Three4 => "3:4",
            Self::Nine16 => "9:16",
            Self::TwentyOne9 => "21:9",
            Self::Adaptive => "adaptive",
        }
    }
}

#[derive(Subcommand)]
pub enum Commands {
    /// Create a video generation task
    #[command(visible_alias = "gen")]
    Generate(Box<GenerateArgs>),

    /// Retrieve a video generation task by id
    #[command(visible_alias = "get")]
    Status {
        /// Task id (e.g. cgt-20260416-abcd1234)
        id: String,
        /// API key override (else SEEDANCE_API_KEY / ARK_API_KEY / config)
        #[arg(long, env = "SEEDANCE_API_KEY", hide_env_values = true)]
        api_key: Option<String>,
    },

    /// Download the generated video for a completed task
    Download {
        /// Task id
        id: String,
        /// Output file path (default: <id>.mp4 in current dir)
        #[arg(long, short = 'o')]
        output: Option<std::path::PathBuf>,
        /// API key override
        #[arg(long, env = "SEEDANCE_API_KEY", hide_env_values = true)]
        api_key: Option<String>,
    },

    /// Cancel a queued task (only possible while status=queued)
    #[command(visible_alias = "rm")]
    Cancel {
        /// Task id
        id: String,
        /// API key override
        #[arg(long, env = "SEEDANCE_API_KEY", hide_env_values = true)]
        api_key: Option<String>,
    },

    /// Build a 9-angle character reference sheet from a single photo (uses nanaban/Nano Banana Pro).
    /// The resulting grid can be passed to `generate --image` to keep a specific person
    /// consistent across Seedance shots -- works around ByteDance's single-face-block.
    CharacterSheet {
        /// Input photo of the subject (path or URL)
        input: String,
        /// Output PNG path (default: ~/Documents/seedance/character-sheet-<hash>.png)
        #[arg(short = 'o', long)]
        output: Option<std::path::PathBuf>,
        /// Extra styling hints to append to the grid prompt
        /// (e.g. "wearing a white hoodie", "studio three-point lighting")
        #[arg(long)]
        style: Option<String>,
        /// Grid size (9-angle 3x3 or 4-angle 2x2)
        #[arg(long, default_value_t = 9)]
        angles: u8,
    },

    /// Wrap an audio file inside a silent mp4 so it can be fed as a `--video` reference.
    /// Workaround for Seedance 2.0's "reference audio mutates lyrics" quirk
    /// (credit: @simeonnz via @MrDavids1). Uses ffmpeg under the hood.
    /// Output is hard-capped at 14.5s (BytePlus rejects >15.2s reference videos).
    AudioToVideo {
        /// Input audio file (wav/mp3/m4a/etc)
        input: std::path::PathBuf,
        /// Output mp4 path (default: <input>.silent.mp4)
        #[arg(short = 'o', long)]
        output: Option<std::path::PathBuf>,
        /// Background color: "black" or "white" (default: black)
        #[arg(long, default_value = "black")]
        background: String,
        /// Output resolution: 480 | 720 (default: 480 — smallest viable)
        #[arg(long, default_value_t = 480)]
        height: u32,
        /// After wrapping, upload to tmpfiles.org and print the hosted URL
        /// (ready to paste into `--video`). tmpfiles.org is confirmed reachable
        /// by the BytePlus fetcher; catbox.moe is blocklisted.
        #[arg(long)]
        upload: bool,
    },

    /// Apply the empirical face-filter-bypass recipe to a portrait so it can be
    /// used as `--first-frame` without tripping BytePlus's real-face detector.
    /// Default recipe (heavy grain + desat) PASSES the filter while keeping
    /// colour and identity; --bw swaps in the pure grayscale variant that also
    /// passes. Uses ImageMagick. Discovered empirically 2026-04-16.
    PrepFace {
        /// Input portrait (path or URL)
        input: std::path::PathBuf,
        /// Output PNG (default: ~/Documents/seedance/prep-face-<hash>.png)
        #[arg(short = 'o', long)]
        output: Option<std::path::PathBuf>,
        /// Use the pure black-and-white + grain variant instead of colour-with-grain
        #[arg(long)]
        bw: bool,
        /// Output width in px (default: 512 -- the proven passing resolution)
        #[arg(long, default_value_t = 512)]
        width: u32,
    },

    /// Upload a local file to a public HTTPS URL via tmpfiles.org (default host)
    /// and print the direct-download URL ready to pass to `--video` / `--image`.
    /// tmpfiles.org is BytePlus-fetcher-compatible; catbox.moe is blocklisted.
    Upload {
        /// Local file to upload
        input: std::path::PathBuf,
    },

    /// List available Seedance model ids
    #[command(visible_alias = "ls")]
    Models,

    /// Check API key, base URL, and dependency health
    Doctor,

    /// Machine-readable capability manifest
    #[command(visible_alias = "info")]
    AgentInfo,

    /// Manage skill file installation for AI agent platforms
    Skill {
        #[command(subcommand)]
        action: SkillAction,
    },

    /// Manage configuration
    Config {
        #[command(subcommand)]
        action: ConfigAction,
    },

    /// Self-update from GitHub Releases
    Update {
        /// Check only, don't install
        #[arg(long)]
        check: bool,
    },
}

#[derive(Args, Debug)]
pub struct GenerateArgs {
    /// Text prompt. Use [Image N] / [Video N] / [Audio N] to reference inputs,
    /// and time codes like `[0-4s]: shot description` for multi-shot control.
    #[arg(long, short = 'p')]
    pub prompt: Option<String>,

    /// Reference image (local path or URL). Repeatable, up to 9.
    /// Role is `reference_image`. Use --first-frame / --last-frame for those modes.
    #[arg(long = "image", short = 'i', value_name = "PATH|URL")]
    pub images: Vec<String>,

    /// Image used as the first frame (role=first_frame)
    #[arg(long, value_name = "PATH|URL", conflicts_with = "images")]
    pub first_frame: Option<String>,

    /// Image used as the last frame (role=last_frame). Requires --first-frame.
    #[arg(long, value_name = "PATH|URL", requires = "first_frame", conflicts_with = "images")]
    pub last_frame: Option<String>,

    /// Reference video URL (role=reference_video). Repeatable, up to 3, total <=15s.
    /// Local paths are NOT supported by the API -- upload to a URL first.
    #[arg(long = "video", short = 'v', value_name = "URL")]
    pub videos: Vec<String>,

    /// Reference audio (local path or URL, wav/mp3). Repeatable, up to 3, total <=15s.
    /// Cannot be the only reference -- requires at least one image or video.
    #[arg(long = "audio", short = 'a', value_name = "PATH|URL")]
    pub audio: Vec<String>,

    /// Video duration in seconds. [4,15] or -1 for auto (Seedance 2.0).
    #[arg(long, short = 'd', default_value_t = 5, allow_hyphen_values = true)]
    pub duration: i32,

    /// Output resolution. Seedance 2.0 does not support 1080p.
    #[arg(long, short = 'r', value_enum, default_value = "720p")]
    pub resolution: Resolution,

    /// Output aspect ratio
    #[arg(long, value_enum, default_value = "adaptive")]
    pub ratio: Ratio,

    /// Seed for reproducibility. -1 = random.
    #[arg(long, default_value_t = -1, allow_hyphen_values = true)]
    pub seed: i64,

    /// Generate audio synchronized with the video (default)
    #[arg(long = "audio-sync", default_value_t = true, overrides_with = "no_audio_sync")]
    pub audio_sync: bool,

    /// Output a silent video
    #[arg(long = "no-audio-sync", default_value_t = false)]
    pub no_audio_sync: bool,

    /// Add a ModelArk watermark to the output
    #[arg(long)]
    pub watermark: bool,

    /// Use the Seedance 2.0 Fast tier (lower latency + cost, slight quality tradeoff)
    #[arg(long, conflicts_with = "model")]
    pub fast: bool,

    /// Override model id (default: dreamina-seedance-2-0-260128, or the fast variant with --fast)
    #[arg(long)]
    pub model: Option<String>,

    /// Callback URL the API hits on status change (optional)
    #[arg(long, value_name = "URL")]
    pub callback_url: Option<String>,

    /// Hashed end-user id for abuse tracking (optional, <=64 ASCII chars)
    #[arg(long, value_name = "ID")]
    pub safety_identifier: Option<String>,

    /// Block until the task finishes, then optionally download the video
    #[arg(long, short = 'w')]
    pub wait: bool,

    /// Output file path (implies --wait). Defaults to <id>.mp4 when --wait is set alone.
    #[arg(long, short = 'o', value_name = "PATH")]
    pub output: Option<std::path::PathBuf>,

    /// Poll interval in seconds when --wait is set
    #[arg(long, default_value_t = 5)]
    pub poll_interval: u64,

    /// Maximum wait in seconds when --wait is set (0 = no limit)
    #[arg(long, default_value_t = 900)]
    pub timeout: u64,

    /// API key override (else SEEDANCE_API_KEY / ARK_API_KEY / config)
    #[arg(long, env = "SEEDANCE_API_KEY", hide_env_values = true)]
    pub api_key: Option<String>,

    /// Override the duplicate-generation guard. Without this, identical deterministic
    /// requests fired within 10 minutes are rejected so agent retries don't double-spend.
    #[arg(long)]
    pub force: bool,
}

#[derive(Subcommand)]
pub enum SkillAction {
    /// Write skill file to all detected agent platforms
    Install,
    /// Check which platforms have the skill installed
    Status,
}

#[derive(Subcommand)]
pub enum ConfigAction {
    /// Display effective merged configuration
    Show,
    /// Print configuration file path
    Path,
    /// Write a value into the TOML config file (api-key, base-url, model)
    Set {
        /// Which setting to update
        #[arg(value_enum)]
        key: ConfigKey,
        /// New value
        value: String,
    },
    /// Remove a value from the TOML config file
    Unset {
        #[arg(value_enum)]
        key: ConfigKey,
    },
}

#[derive(Clone, Copy, ValueEnum, Debug)]
#[value(rename_all = "kebab-case")]
pub enum ConfigKey {
    /// BytePlus ModelArk API key (stored locally, never echoed in `config show`)
    ApiKey,
    /// API base URL (override if BytePlus publishes a new region)
    BaseUrl,
    /// Default model id
    Model,
}

impl ConfigKey {
    pub fn as_str(&self) -> &'static str {
        match self {
            Self::ApiKey => "api_key",
            Self::BaseUrl => "base_url",
            Self::Model => "model",
        }
    }
}