#![cfg(feature = "native")]
use crate::ansi_colors::Colorize;
use std::path::{Path, PathBuf};
#[derive(Debug, Clone, clap::Subcommand)]
pub enum DeployCommand {
Docker {
model: String,
#[arg(long, short = 'o', default_value = ".")]
output: PathBuf,
#[arg(long, default_value = "rust:slim")]
base_image: String,
#[arg(long, default_value = "8080")]
port: u16,
#[arg(long, default_value = "true")]
multi_stage: bool,
},
Lambda {
model: String,
#[arg(long, short = 'o', default_value = ".")]
output: PathBuf,
#[arg(long, default_value = "1024")]
memory: u32,
#[arg(long, default_value = "30")]
timeout: u32,
#[arg(long)]
sam: bool,
},
K8s {
model: String,
#[arg(long, short = 'o', default_value = ".")]
output: PathBuf,
#[arg(long, default_value = "1")]
replicas: u32,
#[arg(long, default_value = "default")]
namespace: String,
#[arg(long)]
helm: bool,
#[arg(long)]
hpa: bool,
},
Fly {
model: String,
#[arg(long, short = 'o', default_value = ".")]
output: PathBuf,
#[arg(long)]
app: Option<String>,
#[arg(long, default_value = "iad")]
region: String,
},
Cloudflare {
model: String,
#[arg(long, short = 'o', default_value = ".")]
output: PathBuf,
#[arg(long)]
name: Option<String>,
},
}
pub fn cmd_deploy(command: DeployCommand) -> anyhow::Result<()> {
match command {
DeployCommand::Docker { model, output, base_image, port, multi_stage } => {
cmd_deploy_docker(&model, &output, &base_image, port, multi_stage)?;
}
DeployCommand::Lambda { model, output, memory, timeout, sam } => {
cmd_deploy_lambda(&model, &output, memory, timeout, sam)?;
}
DeployCommand::K8s { model, output, replicas, namespace, helm, hpa } => {
cmd_deploy_k8s(&model, &output, replicas, &namespace, helm, hpa)?;
}
DeployCommand::Fly { model, output, app, region } => {
cmd_deploy_fly(&model, &output, app.as_deref(), ®ion)?;
}
DeployCommand::Cloudflare { model, output, name } => {
cmd_deploy_cloudflare(&model, &output, name.as_deref())?;
}
}
Ok(())
}
fn cmd_deploy_docker(
model: &str,
output: &Path,
base_image: &str,
port: u16,
multi_stage: bool,
) -> anyhow::Result<()> {
println!("{}", "🐳 Generating Docker Deployment".bright_cyan().bold());
println!();
println!("{} Model: {}", "•".bright_blue(), model.cyan());
println!("{} Output: {}", "•".bright_blue(), output.display());
println!("{} Base image: {}", "•".bright_blue(), base_image.cyan());
println!("{} Port: {}", "•".bright_blue(), port);
println!(
"{} Multi-stage: {}",
"•".bright_blue(),
if multi_stage { "yes".green() } else { "no".dimmed() }
);
println!();
let dockerfile = if multi_stage {
format!(
r#"# Multi-stage Dockerfile for Realizar model serving
# Generated by batuta deploy docker
# Build stage
FROM rust:latest AS builder
WORKDIR /app
# Install dependencies
RUN cargo install realizar
# Runtime stage
FROM {base_image}
WORKDIR /app
# Copy binary from builder
COPY --from=builder /usr/local/cargo/bin/realizar /usr/local/bin/
# Copy model (if local)
# COPY model.gguf /app/model.gguf
# Expose port
EXPOSE {port}
# Health check
HEALTHCHECK --interval=30s --timeout=3s \
CMD curl -f http://localhost:{port}/health || exit 1
# Run server
ENV MODEL_REF="{model}"
CMD ["realizar", "serve", "--host", "0.0.0.0", "--port", "{port}"]
"#
)
} else {
format!(
r#"# Dockerfile for Realizar model serving
# Generated by batuta deploy docker
FROM {base_image}
WORKDIR /app
# Install realizar
RUN cargo install realizar
# Copy model (if local)
# COPY model.gguf /app/model.gguf
EXPOSE {port}
ENV MODEL_REF="{model}"
CMD ["realizar", "serve", "--host", "0.0.0.0", "--port", "{port}"]
"#
)
};
let dockerfile_path = output.join("Dockerfile");
std::fs::write(&dockerfile_path, dockerfile)?;
println!("{} Generated: {}", "✓".bright_green(), dockerfile_path.display());
println!();
println!("{}", "Build and run:".bright_yellow());
println!(" docker build -t my-model-server .");
println!(" docker run -p {}:{} my-model-server", port, port);
Ok(())
}
fn cmd_deploy_lambda(
model: &str,
output: &Path,
memory: u32,
timeout: u32,
sam: bool,
) -> anyhow::Result<()> {
println!("{}", "λ Generating Lambda Deployment".bright_cyan().bold());
println!();
println!("{} Model: {}", "•".bright_blue(), model.cyan());
println!("{} Output: {}", "•".bright_blue(), output.display());
println!("{} Memory: {} MB", "•".bright_blue(), memory);
println!("{} Timeout: {} seconds", "•".bright_blue(), timeout);
println!(
"{} SAM template: {}",
"•".bright_blue(),
if sam { "yes".green() } else { "no".dimmed() }
);
println!();
if sam {
let template = format!(
r#"AWSTemplateFormatVersion: '2010-09-09'
Transform: AWS::Serverless-2016-10-31
Description: Realizar model serving Lambda
# Generated by batuta deploy lambda
Globals:
Function:
Timeout: {timeout}
MemorySize: {memory}
Runtime: provided.al2
Architectures:
- arm64
Resources:
ModelFunction:
Type: AWS::Serverless::Function
Properties:
Handler: bootstrap
CodeUri: .
Description: Realizar model inference
Events:
Inference:
Type: Api
Properties:
Path: /v1/chat/completions
Method: post
Health:
Type: Api
Properties:
Path: /health
Method: get
Environment:
Variables:
MODEL_REF: "{model}"
Outputs:
ApiEndpoint:
Description: API Gateway endpoint URL
Value: !Sub "https://${{ServerlessRestApi}}.execute-api.${{AWS::Region}}.amazonaws.com/Prod/"
"#
);
let template_path = output.join("template.yaml");
std::fs::write(&template_path, template)?;
println!("{} Generated: {}", "✓".bright_green(), template_path.display());
}
println!();
println!("{}", "Build for Lambda:".bright_yellow());
println!(" cargo lambda build --release --arm64");
if sam {
println!();
println!("{}", "Deploy with SAM:".bright_yellow());
println!(" sam deploy --guided");
}
Ok(())
}
fn cmd_deploy_k8s(
model: &str,
output: &Path,
replicas: u32,
namespace: &str,
_helm: bool,
hpa: bool,
) -> anyhow::Result<()> {
println!("{}", "☸ Generating Kubernetes Deployment".bright_cyan().bold());
println!();
println!("{} Model: {}", "•".bright_blue(), model.cyan());
println!("{} Output: {}", "•".bright_blue(), output.display());
println!("{} Replicas: {}", "•".bright_blue(), replicas);
println!("{} Namespace: {}", "•".bright_blue(), namespace.cyan());
println!(
"{} HPA: {}",
"•".bright_blue(),
if hpa { "enabled".green() } else { "disabled".dimmed() }
);
println!();
let deployment = format!(
r#"apiVersion: apps/v1
kind: Deployment
metadata:
name: realizar-model-server
namespace: {namespace}
spec:
replicas: {replicas}
selector:
matchLabels:
app: realizar-model-server
template:
metadata:
labels:
app: realizar-model-server
spec:
containers:
- name: realizar
image: realizar:latest
ports:
- containerPort: 8080
env:
- name: MODEL_REF
value: "{model}"
resources:
requests:
memory: "1Gi"
cpu: "500m"
limits:
memory: "4Gi"
cpu: "2"
livenessProbe:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: 8080
initialDelaySeconds: 5
periodSeconds: 5
---
apiVersion: v1
kind: Service
metadata:
name: realizar-model-server
namespace: {namespace}
spec:
selector:
app: realizar-model-server
ports:
- port: 80
targetPort: 8080
type: ClusterIP
"#
);
let deployment_path = output.join("deployment.yaml");
std::fs::write(&deployment_path, deployment)?;
println!("{} Generated: {}", "✓".bright_green(), deployment_path.display());
if hpa {
let hpa_manifest = format!(
r"apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: realizar-model-server-hpa
namespace: {namespace}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: realizar-model-server
minReplicas: {replicas}
maxReplicas: 10
metrics:
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: 70
"
);
let hpa_path = output.join("hpa.yaml");
std::fs::write(&hpa_path, hpa_manifest)?;
println!("{} Generated: {}", "✓".bright_green(), hpa_path.display());
}
println!();
println!("{}", "Deploy:".bright_yellow());
println!(" kubectl apply -f {}", deployment_path.display());
if hpa {
println!(" kubectl apply -f {}", output.join("hpa.yaml").display());
}
Ok(())
}
fn cmd_deploy_fly(
model: &str,
output: &Path,
app: Option<&str>,
region: &str,
) -> anyhow::Result<()> {
println!("{}", "✈️ Generating Fly.io Deployment".bright_cyan().bold());
println!();
println!("{} Model: {}", "•".bright_blue(), model.cyan());
println!("{} Output: {}", "•".bright_blue(), output.display());
println!("{} App: {}", "•".bright_blue(), app.unwrap_or("auto-generated").cyan());
println!("{} Region: {}", "•".bright_blue(), region.cyan());
println!();
let app_name = app.unwrap_or("realizar-model-server");
let fly_toml = format!(
r#"# fly.toml - Fly.io configuration
# Generated by batuta deploy fly
app = "{app_name}"
primary_region = "{region}"
[env]
MODEL_REF = "{model}"
[http_service]
internal_port = 8080
force_https = true
auto_stop_machines = true
auto_start_machines = true
min_machines_running = 0
[[services]]
internal_port = 8080
protocol = "tcp"
[[services.ports]]
handlers = ["http"]
port = 80
[[services.ports]]
handlers = ["tls", "http"]
port = 443
[[services.tcp_checks]]
grace_period = "30s"
interval = "15s"
restart_limit = 0
timeout = "2s"
[[services.http_checks]]
grace_period = "30s"
interval = "10s"
method = "get"
path = "/health"
protocol = "http"
timeout = "2s"
"#
);
let fly_path = output.join("fly.toml");
std::fs::write(&fly_path, fly_toml)?;
println!("{} Generated: {}", "✓".bright_green(), fly_path.display());
println!();
println!("{}", "Deploy:".bright_yellow());
println!(" fly launch");
println!(" fly deploy");
Ok(())
}
fn cmd_deploy_cloudflare(model: &str, output: &Path, name: Option<&str>) -> anyhow::Result<()> {
println!("{}", "☁️ Generating Cloudflare Workers Deployment".bright_cyan().bold());
println!();
println!("{} Model: {}", "•".bright_blue(), model.cyan());
println!("{} Output: {}", "•".bright_blue(), output.display());
println!("{} Worker name: {}", "•".bright_blue(), name.unwrap_or("realizar-worker").cyan());
println!();
let worker_name = name.unwrap_or("realizar-worker");
let wrangler_toml = format!(
r#"# wrangler.toml - Cloudflare Workers configuration
# Generated by batuta deploy cloudflare
name = "{worker_name}"
main = "src/index.js"
compatibility_date = "2024-01-01"
[vars]
MODEL_REF = "{model}"
# Note: Cloudflare Workers have limited compute resources
# Consider using Cloudflare Pages with Functions for larger models
# or Cloudflare Workers with Durable Objects for persistent state
"#
);
let wrangler_path = output.join("wrangler.toml");
std::fs::write(&wrangler_path, wrangler_toml)?;
println!("{} Generated: {}", "✓".bright_green(), wrangler_path.display());
println!();
println!("{}", "Note:".bright_yellow());
println!(" Cloudflare Workers have limited compute resources.");
println!(" For ML inference, consider using:");
println!(" - Cloudflare Workers AI (built-in LLM support)");
println!(" - Edge proxy to Realizar server");
println!();
println!("{}", "Deploy:".bright_yellow());
println!(" wrangler deploy");
Ok(())
}