Skip to main content

apr_cli/
model_ops_commands.rs

1
2#[derive(Subcommand, Debug)]
3pub enum ModelOpsCommands {
4    /// Fine-tune model with LoRA/QLoRA (GH-244)
5    #[cfg(feature = "training")]
6    Finetune {
7        /// Input model file
8        #[arg(value_name = "FILE")]
9        file: Option<PathBuf>,
10        /// Fine-tuning method: auto, full, lora, qlora
11        #[arg(long, short = 'm', default_value = "auto")]
12        method: String,
13        /// LoRA rank (default: auto-selected)
14        #[arg(long, short = 'r')]
15        rank: Option<u32>,
16        /// Available VRAM in GB
17        #[arg(long, default_value = "16.0")]
18        vram: f64,
19        /// Plan mode (estimate only)
20        #[arg(long)]
21        plan: bool,
22        /// Training data file (JSONL format)
23        #[arg(long, short = 'd', value_name = "FILE")]
24        data: Option<PathBuf>,
25        /// Output path (adapter dir or merged model)
26        #[arg(short, long)]
27        output: Option<PathBuf>,
28        /// Adapter path for merge mode
29        #[arg(long)]
30        adapter: Option<PathBuf>,
31        /// Merge adapter into base model
32        #[arg(long)]
33        merge: bool,
34        /// Training epochs
35        #[arg(long, default_value = "3")]
36        epochs: u32,
37        /// Learning rate
38        #[arg(long, default_value = "0.0002")]
39        learning_rate: f64,
40        /// Model size for planning (e.g., "7B", "1.5B")
41        #[arg(long, value_name = "SIZE")]
42        model_size: Option<String>,
43        /// Fine-tuning task: classify (sequence classification)
44        #[arg(long)]
45        task: Option<String>,
46        /// Number of classes for classification task
47        #[arg(long, default_value = "5")]
48        num_classes: usize,
49        /// Output format for checkpoints: apr, safetensors, or both (comma-separated)
50        #[arg(long, value_name = "FORMAT", default_value = "apr,safetensors")]
51        checkpoint_format: String,
52        /// Oversample minority classes to match majority (for imbalanced datasets)
53        #[arg(long)]
54        oversample: bool,
55        /// Maximum sequence length for GPU buffer allocation (lower = less VRAM)
56        #[arg(long, value_name = "LEN")]
57        max_seq_len: Option<usize>,
58        /// Quantize frozen weights to NF4 (4-bit) for QLoRA training (~8x VRAM savings)
59        #[arg(long)]
60        quantize_nf4: bool,
61        /// GPU indices for data-parallel training (e.g., "0,1" for dual GPU)
62        #[arg(long, value_name = "INDICES")]
63        gpus: Option<String>,
64        /// GPU backend selection: auto, cuda, wgpu
65        #[arg(long, default_value = "auto")]
66        gpu_backend: String,
67        /// Distributed training role: coordinator or worker
68        #[arg(long, value_name = "ROLE")]
69        role: Option<String>,
70        /// Address to bind (coordinator) or connect to (worker)
71        #[arg(long, value_name = "ADDR")]
72        bind: Option<String>,
73        /// Coordinator address for worker nodes (e.g., "intel:9000")
74        #[arg(long, value_name = "ADDR")]
75        coordinator: Option<String>,
76        /// Expected number of workers (coordinator only)
77        #[arg(long, value_name = "N")]
78        expect_workers: Option<usize>,
79        /// Wait for VRAM availability before training (timeout in seconds, 0 = no wait)
80        #[arg(long, value_name = "SECS", default_value = "0")]
81        wait_gpu: u64,
82        /// Multi-adapter training: data:checkpoint pairs (GPU-SHARE Phase 2)
83        /// Format: --adapters data/corpus-a.jsonl:checkpoints/adapter-a
84        /// Can be specified multiple times for concurrent adapter training.
85        #[arg(long, value_name = "DATA:CHECKPOINT")]
86        adapters: Vec<String>,
87
88        /// Multi-adapter config file: TOML with [[adapter]] entries (GPU-SHARE §2.4)
89        #[arg(long, value_name = "FILE")]
90        adapters_config: Option<PathBuf>,
91
92        /// Enable experimental CUDA MPS for concurrent GPU sharing (GPU-SHARE §1.5).
93        /// WARNING: A GPU fault in any MPS client will crash ALL clients on that GPU.
94        #[arg(long)]
95        experimental_mps: bool,
96
97        /// MPS thread percentage (1-100). Controls SM allocation per process.
98        /// Only effective with --experimental-mps. Default: 50.
99        #[arg(long, value_name = "PCT", default_value = "50")]
100        gpu_share: u32,
101    },
102    /// Prune model (structured/unstructured pruning) (GH-247)
103    Prune {
104        /// Input model file
105        #[arg(value_name = "FILE")]
106        file: PathBuf,
107        /// Pruning method: magnitude, structured, depth, width, wanda, sparsegpt
108        #[arg(long, short = 'm', default_value = "magnitude")]
109        method: String,
110        /// Target pruning ratio (0-1)
111        #[arg(long, default_value = "0.5")]
112        target_ratio: f32,
113        /// Sparsity level (0-1)
114        #[arg(long, default_value = "0.0")]
115        sparsity: f32,
116        /// Output file path
117        #[arg(short, long)]
118        output: Option<PathBuf>,
119        /// Layers to remove for depth pruning (e.g., "20-24")
120        #[arg(long)]
121        remove_layers: Option<String>,
122        /// Analyze mode (identify pruning opportunities)
123        #[arg(long)]
124        analyze: bool,
125        /// Plan mode (estimate only)
126        #[arg(long)]
127        plan: bool,
128        /// Calibration data file
129        #[arg(long, value_name = "FILE")]
130        calibration: Option<PathBuf>,
131    },
132    /// Knowledge distillation (teacher -> student) (GH-247, ALB-011)
133    Distill {
134        /// Teacher model file (positional, for file-based mode)
135        #[arg(value_name = "TEACHER")]
136        teacher: Option<PathBuf>,
137        /// Student model file
138        #[arg(long, value_name = "FILE")]
139        student: Option<PathBuf>,
140        /// Training data file
141        #[arg(long, short = 'd', value_name = "FILE")]
142        data: Option<PathBuf>,
143        /// Output file path
144        #[arg(short, long)]
145        output: Option<PathBuf>,
146        /// Distillation strategy: standard, progressive, ensemble
147        #[arg(long, default_value = "standard")]
148        strategy: String,
149        /// Temperature for softmax scaling
150        #[arg(long, default_value = "3.0")]
151        temperature: f64,
152        /// Alpha weight for KL vs task loss
153        #[arg(long, default_value = "0.7")]
154        alpha: f64,
155        /// Training epochs
156        #[arg(long, default_value = "3")]
157        epochs: u32,
158        /// Plan mode (estimate only)
159        #[arg(long)]
160        plan: bool,
161        /// YAML config file for two-stage distillation (ALB-011)
162        #[arg(long, value_name = "FILE")]
163        config: Option<PathBuf>,
164        /// Distillation stage: precompute, train (logit KD), or generate (text-based, GH-455)
165        #[arg(long, value_name = "STAGE")]
166        stage: Option<String>,
167    },
168}