1use clap::{Parser, Subcommand};
2use std::path::PathBuf;
3use anyhow::Context;
4use crate::compiler::{
5 Compiler, optimizer::OptimizationLevel, loader::BinaryLoader,
6 bundle::Bundler,
7};
8use crate::server::{ServerConfig, start_server};
9mod project;
10mod workflow;
11mod tools;
12mod publish;
13mod config;
14use project::*;
15use workflow::*;
16use tools::*;
17use publish::*;
18use config::*;
19#[derive(Parser)]
20#[command(name = "hlx")]
21#[command(version = env!("CARGO_PKG_VERSION"))]
22#[command(about = "HELIX Compiler - Configuration without the pain")]
23#[command(long_about = None)]
24pub struct Cli {
25 #[arg(short, long, global = true)]
26 verbose: bool,
27 #[command(subcommand)]
28 command: Commands,
29}
30#[derive(Subcommand)]
31enum WorkflowAction {
32 Watch {
33 directory: PathBuf,
34 #[arg(short, long)]
35 output: Option<PathBuf>,
36 #[arg(short = 'O', long, default_value = "2")]
37 optimize: u8,
38 },
39 Start { directory: PathBuf, #[arg(short, long)] output: Option<PathBuf> },
40 Stop,
41 Status,
42 List,
43 Pause { workflow_id: String },
44 Resume { workflow_id: String },
45 Kill { workflow_id: String },
46}
47
48#[derive(Subcommand)]
49enum DatasetAction {
50 Process {
51 files: Vec<PathBuf>,
52 #[arg(short, long)]
53 output: Option<PathBuf>,
54 #[arg(long)]
55 format: Option<String>,
56 #[arg(long)]
57 algorithm: Option<String>,
58 #[arg(long)]
59 validate: bool,
60 },
61 Analyze {
62 files: Vec<PathBuf>,
63 #[arg(long)]
64 detailed: bool,
65 },
66 Convert {
67 input: PathBuf,
68 #[arg(short, long)]
69 output: Option<PathBuf>,
70 #[arg(long)]
71 from_format: String,
72 #[arg(long)]
73 to_format: String,
74 },
75 Quality {
76 files: Vec<PathBuf>,
77 #[arg(long)]
78 report: bool,
79 },
80 Huggingface {
81 dataset: String,
82 #[arg(long)]
83 split: Option<String>,
84 #[arg(long)]
85 output: Option<PathBuf>,
86 #[arg(long)]
87 cache_dir: Option<PathBuf>,
88 },
89}
90
91#[derive(Subcommand)]
92enum CaptionAction {
93 Process {
94 files: Vec<PathBuf>,
95 #[arg(short, long)]
96 output: Option<PathBuf>,
97 #[arg(long)]
98 config: Option<PathBuf>,
99 },
100 E621 {
101 files: Vec<PathBuf>,
102 #[arg(short, long)]
103 output: Option<PathBuf>,
104 #[arg(long)]
105 filter_tags: bool,
106 #[arg(long)]
107 format: Option<String>,
108 },
109 Convert {
110 input: PathBuf,
111 #[arg(short, long)]
112 output: Option<PathBuf>,
113 #[arg(long)]
114 format: Option<String>,
115 },
116}
117
118#[derive(Subcommand)]
119enum JsonAction {
120 Format {
121 files: Vec<PathBuf>,
122 #[arg(long)]
123 check: bool,
124 },
125 Validate {
126 files: Vec<PathBuf>,
127 #[arg(long)]
128 schema: Option<PathBuf>,
129 },
130 Metadata {
131 files: Vec<PathBuf>,
132 #[arg(short, long)]
133 output: Option<PathBuf>,
134 },
135 Split {
136 file: PathBuf,
137 #[arg(short, long)]
138 output: Option<PathBuf>,
139 },
140 Merge {
141 files: Vec<PathBuf>,
142 #[arg(short, long)]
143 output: PathBuf,
144 },
145}
146#[derive(Subcommand)]
147enum Commands {
148 Compile {
149 input: PathBuf,
150 #[arg(short, long)]
151 output: Option<PathBuf>,
152 #[arg(short, long)]
153 compress: bool,
154 #[arg(short = 'O', long, default_value = "2")]
155 optimize: u8,
156 #[arg(long)]
157 cache: bool,
158 },
159 Decompile { input: PathBuf, #[arg(short, long)] output: Option<PathBuf> },
160 Validate { file: PathBuf, #[arg(short, long)] detailed: bool },
161 Bundle {
162 directory: PathBuf,
163 #[arg(short, long, default_value = "bundle.hlxb")]
164 output: PathBuf,
165 #[arg(short, long)]
166 include: Vec<String>,
167 #[arg(short = 'x', long)]
168 exclude: Vec<String>,
169 #[arg(long)]
170 tree_shake: bool,
171 #[arg(short = 'O', long, default_value = "2")]
172 optimize: u8,
173 },
174 Info {
175 file: PathBuf,
176 #[arg(short, long, default_value = "text")]
177 format: String,
178 #[arg(long)]
179 symbols: bool,
180 #[arg(long)]
181 sections: bool,
182 },
183 Watch {
184 directory: PathBuf,
185 #[arg(short, long)]
186 output: Option<PathBuf>,
187 #[arg(short = 'O', long, default_value = "2")]
188 optimize: u8,
189 },
190 Diff { file1: PathBuf, file2: PathBuf, #[arg(short, long)] detailed: bool },
191 Optimize {
192 input: PathBuf,
193 #[arg(short, long)]
194 output: Option<PathBuf>,
195 #[arg(short = 'O', long, default_value = "3")]
196 level: u8,
197 },
198 Init {
199 #[arg(short, long)]
200 name: Option<String>,
201 #[arg(short, long)]
202 dir: Option<PathBuf>,
203 #[arg(short, long, default_value = "minimal")]
204 template: String,
205 #[arg(short, long)]
206 force: bool,
207 },
208 Install {
209 #[arg(long)]
210 local_only: bool,
211 #[arg(short, long)]
212 force: bool,
213 #[arg(short, long)]
214 verbose: bool,
215 },
216 Add {
217 dependency: String,
218 #[arg(short, long)]
219 version: Option<String>,
220 #[arg(long)]
221 dev: bool,
222 },
223 Remove { dependency: String, #[arg(long)] dev: bool },
224 Clean { #[arg(long)] all: bool, #[arg(long)] cache: bool },
225 Reset { #[arg(short, long)] force: bool },
226 Build {
227 input: Option<PathBuf>,
228 #[arg(short, long)]
229 output: Option<PathBuf>,
230 #[arg(short = 'O', long, default_value = "2")]
231 optimize: u8,
232 #[arg(short, long)]
233 compress: bool,
234 #[arg(long)]
235 cache: bool,
236 },
237 Run {
238 input: Option<PathBuf>,
239 args: Vec<String>,
240 #[arg(short = 'O', long, default_value = "2")]
241 optimize: u8,
242 },
243 Test { #[arg(short, long)] pattern: Option<String>, #[arg(long)] integration: bool },
244 Bench {
245 #[arg(short, long)]
246 pattern: Option<String>,
247 #[arg(short, long)]
248 iterations: Option<usize>,
249 },
250 Serve {
251 #[arg(short, long)]
252 port: Option<u16>,
253 #[arg(long)]
254 domain: Option<String>,
255 #[arg(short, long)]
256 directory: Option<PathBuf>,
257 #[arg(long)]
258 no_convert: bool,
259 #[arg(long)]
260 cache_timeout: Option<u64>,
261 #[arg(long)]
262 max_file_size: Option<u64>,
263 },
264 Fmt { files: Vec<PathBuf>, #[arg(long)] check: bool },
265 Lint { files: Vec<PathBuf> },
266 Generate {
267 template: String,
268 #[arg(short, long)]
269 output: Option<PathBuf>,
270 #[arg(short, long)]
271 name: Option<String>,
272 #[arg(short, long)]
273 force: bool,
274 },
275 Publish {
276 #[arg(short, long)]
277 registry: Option<String>,
278 #[arg(short, long)]
279 token: Option<String>,
280 #[arg(long)]
281 dry_run: bool,
282 },
283 Sign {
284 input: PathBuf,
285 #[arg(short, long)]
286 key: Option<String>,
287 #[arg(short, long)]
288 output: Option<PathBuf>,
289 #[arg(long)]
290 verify: bool,
291 },
292 Export {
293 format: String,
294 #[arg(short, long)]
295 output: Option<PathBuf>,
296 #[arg(long)]
297 include_deps: bool,
298 },
299 Import {
300 input: PathBuf,
301 #[arg(short, long)]
302 format: Option<String>,
303 #[arg(short, long)]
304 force: bool,
305 },
306 Config { action: String, key: Option<String>, value: Option<String> },
307 Cache { action: String },
308 Doctor,
309 ServeProject {
310 #[arg(short, long)]
311 port: Option<u16>,
312 #[arg(long)]
313 host: Option<String>,
314 #[arg(short, long)]
315 directory: Option<PathBuf>,
316 },
317 Workflow { #[command(subcommand)] action: WorkflowAction },
318 Dataset {
320 #[command(subcommand)]
321 action: DatasetAction,
322 },
323 Concat {
324 directory: PathBuf,
325 #[arg(short, long, default_value = "caption+wd+tags")]
326 preset: String,
327 #[arg(short, long)]
328 output_dir: Option<PathBuf>,
329 #[arg(long)]
330 dry_run: bool,
331 #[arg(long)]
332 deduplicate: bool,
333 },
334 Caption {
335 #[command(subcommand)]
336 action: CaptionAction,
337 },
338 Json {
339 #[command(subcommand)]
340 action: JsonAction,
341 },
342}
343pub async fn run() -> Result<(), Box<dyn std::error::Error>> {
344 let cli = Cli::parse();
345 match cli.command {
346 Commands::Compile { input, output, compress, optimize, cache } => {
347 compile_command(input, output, compress, optimize, cache, cli.verbose)
348 }
349 Commands::Decompile { input, output } => {
350 decompile_command(input, output, cli.verbose)
351 }
352 Commands::Validate { file, detailed } => {
353 validate_command(file, detailed || cli.verbose)
354 }
355 Commands::Bundle {
356 directory,
357 output,
358 include,
359 exclude,
360 tree_shake,
361 optimize,
362 } => {
363 bundle_command(
364 directory,
365 output,
366 include,
367 exclude,
368 tree_shake,
369 optimize,
370 cli.verbose,
371 )
372 }
373 Commands::Info { file, format, symbols, sections } => {
374 info_command(file, format, symbols, sections, cli.verbose)
375 }
376 Commands::Watch { directory, output, optimize } => {
377 watch_command(directory, output, optimize, cli.verbose)
378 }
379 Commands::Diff { file1, file2, detailed } => {
380 diff_command(file1, file2, detailed || cli.verbose)
381 }
382 Commands::Optimize { input, output, level } => {
383 optimize_command(input, output, level, cli.verbose)
384 }
385 Commands::Init { name, dir, template, force } => {
386 init_command(template, dir, name, force, cli.verbose)?;
387 Ok(())
388 }
389 Commands::Install { local_only, force, verbose } => {
390 install_command(local_only, force, verbose || cli.verbose)
391 }
392 Commands::Add { dependency, version, dev } => {
393 add_dependency(dependency, version, dev, cli.verbose)?;
394 Ok(())
395 }
396 Commands::Remove { dependency, dev } => {
397 remove_dependency(dependency, dev, cli.verbose)?;
398 Ok(())
399 }
400 Commands::Clean { all, cache } => {
401 clean_project(all, cache, cli.verbose)?;
402 Ok(())
403 }
404 Commands::Reset { force } => {
405 reset_project(force, cli.verbose)?;
406 Ok(())
407 }
408 Commands::Build { input, output, optimize, compress, cache } => {
409 build_project(input, output, optimize, compress, cache, cli.verbose)
410 }
411 Commands::Run { input, args, optimize } => {
412 run_project(input, args, optimize, cli.verbose)?;
413 Ok(())
414 }
415 Commands::Test { pattern, integration } => {
416 run_tests(pattern, cli.verbose, integration)?;
417 Ok(())
418 }
419 Commands::Bench { pattern, iterations } => {
420 run_benchmarks(pattern, iterations, cli.verbose)?;
421 Ok(())
422 }
423 Commands::Serve {
424 port,
425 domain,
426 directory,
427 no_convert,
428 cache_timeout,
429 max_file_size,
430 } => {
431 let mut config = ServerConfig::default();
432 if let Some(p) = port {
433 config.port = p;
434 }
435 if let Some(d) = domain {
436 config.domain = d;
437 }
438 if let Some(dir) = directory {
439 config.root_directory = dir;
440 }
441 config.auto_convert = !no_convert;
442 if let Some(ct) = cache_timeout {
443 config.cache_timeout = ct;
444 }
445 if let Some(mfs) = max_file_size {
446 config.max_file_size = mfs;
447 }
448 config.verbose = cli.verbose;
449 start_server(config)?;
450 Ok(())
451 }
452 Commands::Fmt { files, check } => {
453 format_files(files, check, cli.verbose)?;
454 Ok(())
455 }
456 Commands::Lint { files } => {
457 lint_files(files, cli.verbose)?;
458 Ok(())
459 }
460 Commands::Generate { template, output, name, force } => {
461 generate_code(template, output, name, force, cli.verbose)?;
462 Ok(())
463 }
464 Commands::Publish { registry, token, dry_run } => {
465 publish_project(registry, token, dry_run, cli.verbose)?;
466 Ok(())
467 }
468 Commands::Sign { input, key, output, verify } => {
469 sign_binary(input, key, output, verify, cli.verbose)?;
470 Ok(())
471 }
472 Commands::Export { format, output, include_deps } => {
473 export_project(format, output, include_deps, cli.verbose)?;
474 Ok(())
475 }
476 Commands::Import { input, format, force } => {
477 import_project(input, format, force, cli.verbose)?;
478 Ok(())
479 }
480 Commands::Config { action, key, value } => {
481 manage_config(action.parse()?, key, value, cli.verbose)?;
482 Ok(())
483 }
484 Commands::Cache { action } => {
485 manage_cache(action.parse()?, cli.verbose)?;
486 Ok(())
487 }
488 Commands::Doctor => {
489 run_diagnostics(cli.verbose)?;
490 Ok(())
491 }
492 Commands::ServeProject { port, host, directory } => {
493 Ok(serve_project(port, host, directory, cli.verbose)?)
494 }
495 Commands::Dataset { action } => {
496 dataset_command(action, cli.verbose).await
497 }
498 Commands::Concat { directory, preset, output_dir, dry_run, deduplicate } => {
499 concat_command(directory, preset, output_dir, dry_run, deduplicate, cli.verbose)
500 }
501 Commands::Caption { action } => {
502 caption_command(action, cli.verbose).await
503 }
504 Commands::Json { action } => {
505 json_command(action, cli.verbose).await
506 }
507 Commands::Workflow { action } => {
508 match action {
509 WorkflowAction::Watch { directory, output, optimize } => {
510 watch_command(directory, output, optimize, cli.verbose)
511 }
512 WorkflowAction::Start { directory, output } => {
513 Ok(start_hot_reload(directory, output, cli.verbose)?)
514 }
515 WorkflowAction::Stop => Ok(stop_hot_reload(cli.verbose)?),
516 WorkflowAction::Status => Ok(get_workflow_status(cli.verbose)?),
517 WorkflowAction::List => Ok(list_workflows(cli.verbose)?),
518 WorkflowAction::Pause { workflow_id } => {
519 Ok(pause_workflow(workflow_id, cli.verbose)?)
520 }
521 WorkflowAction::Resume { workflow_id } => {
522 Ok(resume_workflow(workflow_id, cli.verbose)?)
523 }
524 WorkflowAction::Kill { workflow_id } => {
525 Ok(stop_workflow(workflow_id, cli.verbose)?)
526 }
527 }
528 }
529 }
530}
531fn compile_command(
532 input: PathBuf,
533 output: Option<PathBuf>,
534 compress: bool,
535 optimize: u8,
536 cache: bool,
537 verbose: bool,
538) -> Result<(), Box<dyn std::error::Error>> {
539 let output_path = output
540 .unwrap_or_else(|| {
541 let mut path = input.clone();
542 path.set_extension("hlxb");
543 path
544 });
545 if verbose {
546 println!("📦 Compiling: {}", input.display());
547 println!(" Optimization: Level {}", optimize);
548 println!(" Compression: {}", if compress { "Enabled" } else { "Disabled" });
549 println!(" Cache: {}", if cache { "Enabled" } else { "Disabled" });
550 }
551 let compiler = Compiler::builder()
552 .optimization_level(OptimizationLevel::from(optimize))
553 .compression(compress)
554 .cache(cache)
555 .verbose(verbose)
556 .build();
557 let binary = compiler.compile_file(&input)?;
558 let serializer = crate::compiler::serializer::BinarySerializer::new(compress);
559 serializer.write_to_file(&binary, &output_path)?;
560 println!("✅ Compiled successfully: {}", output_path.display());
561 println!(" Size: {} bytes", binary.size());
562 if verbose {
563 let stats = binary.symbol_table.stats();
564 println!(
565 " Strings: {} (unique: {})", stats.total_strings, stats.unique_strings
566 );
567 println!(" Agents: {}", stats.agents);
568 println!(" Workflows: {}", stats.workflows);
569 }
570 Ok(())
571}
572fn decompile_command(
573 input: PathBuf,
574 output: Option<PathBuf>,
575 verbose: bool,
576) -> Result<(), Box<dyn std::error::Error>> {
577 let output_path = output
578 .unwrap_or_else(|| {
579 let mut path = input.clone();
580 path.set_extension("hlx");
581 path
582 });
583 if verbose {
584 println!("🔄 Decompiling: {}", input.display());
585 }
586 let loader = BinaryLoader::new();
587 let binary = loader.load_file(&input)?;
588 let compiler = Compiler::new(OptimizationLevel::Zero);
589 let source = compiler.decompile(&binary)?;
590 std::fs::write(&output_path, source)?;
591 println!("✅ Decompiled successfully: {}", output_path.display());
592 Ok(())
593}
594fn validate_command(
595 file: PathBuf,
596 detailed: bool,
597) -> Result<(), Box<dyn std::error::Error>> {
598 let extension = file.extension().and_then(|s| s.to_str());
599 match extension {
600 Some("hlx") => {
601 let source = std::fs::read_to_string(&file)?;
602 let ast = crate::parse(&source)?;
603 crate::validate(&ast)?;
604 println!("✅ Valid HELIX file: {}", file.display());
605 if detailed {
606 println!(" Declarations: {}", ast.declarations.len());
607 }
608 }
609 Some("hlxb") => {
610 let loader = BinaryLoader::new();
611 let binary = loader.load_file(&file)?;
612 binary.validate()?;
613 println!("✅ Valid HLXB file: {}", file.display());
614 if detailed {
615 println!(" Version: {}", binary.version);
616 println!(" Sections: {}", binary.data_sections.len());
617 println!(" Checksum: {:x}", binary.checksum);
618 }
619 }
620 _ => {
621 return Err("Unknown file type (expected .hlx or .hlxb)".into());
622 }
623 }
624 Ok(())
625}
626fn bundle_command(
627 directory: PathBuf,
628 output: PathBuf,
629 include: Vec<String>,
630 exclude: Vec<String>,
631 tree_shake: bool,
632 optimize: u8,
633 verbose: bool,
634) -> Result<(), Box<dyn std::error::Error>> {
635 if verbose {
636 println!("📦 Bundling directory: {}", directory.display());
637 if !include.is_empty() {
638 println!(" Include patterns: {:?}", include);
639 }
640 if !exclude.is_empty() {
641 println!(" Exclude patterns: {:?}", exclude);
642 }
643 println!(" Tree shaking: {}", if tree_shake { "Enabled" } else { "Disabled" });
644 }
645 let mut bundler = Bundler::new().with_tree_shaking(tree_shake).verbose(verbose);
646 for pattern in include {
647 bundler = bundler.include(&pattern);
648 }
649 for pattern in exclude {
650 bundler = bundler.exclude(&pattern);
651 }
652 let binary = bundler
653 .bundle_directory(&directory, OptimizationLevel::from(optimize))?;
654 let serializer = crate::compiler::serializer::BinarySerializer::new(true);
655 serializer.write_to_file(&binary, &output)?;
656 println!("✅ Bundle created: {}", output.display());
657 println!(" Size: {} bytes", binary.size());
658 if let Some(file_count) = binary.metadata.extra.get("bundle_files") {
659 println!(" Files bundled: {}", file_count);
660 }
661 Ok(())
662}
663fn info_command(
664 file: PathBuf,
665 format: String,
666 symbols: bool,
667 sections: bool,
668 verbose: bool,
669) -> Result<(), Box<dyn std::error::Error>> {
670 let loader = BinaryLoader::new();
671 let binary = loader.load_file(&file)?;
672 match format.as_str() {
673 "json" => {
674 let json = serde_json::to_string_pretty(&binary.metadata)?;
675 println!("{}", json);
676 }
677 "yaml" => {
678 println!("YAML output not yet implemented");
679 }
680 "text" | _ => {
681 println!("HELIX Binary Information");
682 println!("=======================");
683 println!("File: {}", file.display());
684 println!("Version: {}", binary.version);
685 println!("Compiler: {}", binary.metadata.compiler_version);
686 println!("Platform: {}", binary.metadata.platform);
687 println!("Created: {}", binary.metadata.created_at);
688 println!("Optimization: Level {}", binary.metadata.optimization_level);
689 println!("Compressed: {}", binary.flags.compressed);
690 println!("Size: {} bytes", binary.size());
691 println!("Checksum: {:x}", binary.checksum);
692 if let Some(source) = &binary.metadata.source_path {
693 println!("Source: {}", source);
694 }
695 if symbols || verbose {
696 println!("\nSymbol Table:");
697 let stats = binary.symbol_table.stats();
698 println!(
699 " Strings: {} (unique: {})", stats.total_strings, stats
700 .unique_strings
701 );
702 println!(" Total bytes: {}", stats.total_bytes);
703 println!(" Agents: {}", stats.agents);
704 println!(" Workflows: {}", stats.workflows);
705 println!(" Contexts: {}", stats.contexts);
706 println!(" Crews: {}", stats.crews);
707 }
708 if sections || verbose {
709 println!("\nData Sections:");
710 for (i, section) in binary.data_sections.iter().enumerate() {
711 println!(" [{}] {:?}", i, section.section_type);
712 println!(" Size: {} bytes", section.size);
713 if let Some(compression) = §ion.compression {
714 println!(" Compression: {:?}", compression);
715 }
716 }
717 }
718 }
719 }
720 Ok(())
721}
722fn watch_command(
723 directory: PathBuf,
724 _output: Option<PathBuf>,
725 _optimize: u8,
726 _verbose: bool,
727) -> Result<(), Box<dyn std::error::Error>> {
728 println!("👀 Watching directory: {}", directory.display());
729 println!(" Press Ctrl+C to stop");
730 println!("Watch mode not yet implemented");
731 Ok(())
732}
733fn diff_command(
734 file1: PathBuf,
735 file2: PathBuf,
736 detailed: bool,
737) -> Result<(), Box<dyn std::error::Error>> {
738 let loader = BinaryLoader::new();
739 let binary1 = loader.load_file(&file1)?;
740 let binary2 = loader.load_file(&file2)?;
741 println!("Comparing binaries:");
742 println!(" File 1: {}", file1.display());
743 println!(" File 2: {}", file2.display());
744 println!();
745 if binary1.version != binary2.version {
746 println!("⚠️ Version differs: {} vs {}", binary1.version, binary2.version);
747 }
748 if binary1.size() != binary2.size() {
749 println!("⚠️ Size differs: {} vs {} bytes", binary1.size(), binary2.size());
750 }
751 let stats1 = binary1.symbol_table.stats();
752 let stats2 = binary2.symbol_table.stats();
753 if stats1.total_strings != stats2.total_strings {
754 println!(
755 "⚠️ String count differs: {} vs {}", stats1.total_strings, stats2
756 .total_strings
757 );
758 }
759 if detailed {}
760 Ok(())
761}
762fn optimize_command(
763 input: PathBuf,
764 output: Option<PathBuf>,
765 level: u8,
766 verbose: bool,
767) -> Result<(), Box<dyn std::error::Error>> {
768 let output_path = output.unwrap_or_else(|| input.clone());
769 if verbose {
770 println!("⚡ Optimizing: {}", input.display());
771 println!(" Level: {}", level);
772 }
773 let loader = BinaryLoader::new();
774 let binary = loader.load_file(&input)?;
775 let serializer = crate::compiler::serializer::BinarySerializer::new(false);
776 let mut ir = serializer.deserialize_to_ir(&binary)?;
777 let mut optimizer = crate::compiler::optimizer::Optimizer::new(
778 OptimizationLevel::from(level),
779 );
780 optimizer.optimize(&mut ir);
781 let optimized_binary = serializer.serialize(ir, None)?;
782 serializer.write_to_file(&optimized_binary, &output_path)?;
783 println!("✅ Optimized successfully: {}", output_path.display());
784 if verbose {
785 let stats = optimizer.stats();
786 println!("\nOptimization Results:");
787 println!("{}", stats.report());
788 }
789 Ok(())
790}
791const EMBEDDED_TEMPLATES: &[(&str, &str)] = &[
792 ("minimal", r#"# Minimal MSO Configuration Example
793# Demonstrates the simplest valid MSO file
794
795project "minimal-example" {
796 version = "0.1.0"
797 author = "Example"
798}
799
800agent "simple-assistant" {
801 model = "gpt-3.5-turbo"
802 role = "Assistant"
803 temperature = 0.7
804}
805
806workflow "basic-task" {
807 trigger = "manual"
808
809 step "process" {
810 agent = "simple-assistant"
811 task = "Process user request"
812 timeout = 5m
813 }
814}"#),
815 ("ai-dev", "# AI Development Team template - full content embedded"),
816 ("support", r#"# Customer Support AI Configuration
817# AI-powered customer service system
818
819project "customer-support-system" {
820 version = "2.0.0"
821 author = "Support Team"
822 description = "AI-driven customer support with multi-channel capabilities"
823}
824
825agent "support-specialist" {
826 model = "claude-3-sonnet"
827 role = "Customer Support Specialist"
828 temperature = 0.7
829 max_tokens = 100000
830
831 capabilities [
832 "customer-service"
833 "problem-solving"
834 "empathy"
835 "multi-language"
836 "escalation-handling"
837 ]
838
839 backstory {
840 8 years in customer support leadership
841 Handled 100K+ customer interactions
842 Expert in de-escalation techniques
843 Trained support teams worldwide
844 }
845
846 tools = [
847 "zendesk"
848 "intercom"
849 "slack"
850 "email-client"
851 "knowledge-base"
852 ]
853}
854
855agent "technical-expert" {
856 model = "gpt-4"
857 role = "Technical Support Engineer"
858 temperature = 0.6
859 max_tokens = 80000
860
861 capabilities [
862 "technical-troubleshooting"
863 "bug-analysis"
864 "system-diagnostics"
865 "code-review"
866 "api-debugging"
867 ]
868
869 backstory {
870 12 years in software engineering
871 Specialized in distributed systems
872 Published technical documentation
873 Led incident response teams
874 }
875
876 tools = [
877 "terminal"
878 "database-client"
879 "monitoring-tools"
880 "api-tester"
881 "log-analyzer"
882 ]
883}
884
885workflow "customer-inquiry-handling" {
886 trigger = "webhook"
887
888 step "triage" {
889 agent = "support-specialist"
890 task = "Analyze customer inquiry and determine priority level"
891 timeout = 5m
892 }
893
894 step "initial-response" {
895 agent = "support-specialist"
896 task = "Provide immediate acknowledgment and gather more details"
897 timeout = 10m
898 depends_on = ["triage"]
899 }
900
901 step "technical-analysis" {
902 agent = "technical-expert"
903 task = "Investigate technical aspects of the issue"
904 timeout = 15m
905 depends_on = ["triage"]
906
907 retry {
908 max_attempts = 2
909 delay = 2m
910 backoff = "exponential"
911 }
912 }
913
914 step "resolution" {
915 crew = ["support-specialist", "technical-expert"]
916 task = "Develop and implement solution"
917 timeout = 30m
918 depends_on = ["initial-response", "technical-analysis"]
919 }
920
921 step "follow-up" {
922 agent = "support-specialist"
923 task = "Ensure customer satisfaction and document resolution"
924 timeout = 10m
925 depends_on = ["resolution"]
926 }
927
928 pipeline {
929 triage -> initial-response -> technical-analysis -> resolution -> follow-up
930 }
931}
932
933crew "support-team" {
934 agents [
935 "support-specialist"
936 "technical-expert"
937 ]
938
939 process = "hierarchical"
940 manager = "technical-expert"
941 max_iterations = 5
942 verbose = true
943}
944
945memory {
946 provider = "redis"
947 connection = "redis://localhost:6379"
948
949 embeddings {
950 model = "text-embedding-ada-002"
951 dimensions = 1536
952 batch_size = 50
953 }
954
955 cache_size = 5000
956 persistence = false
957}
958
959context "production" {
960 environment = "prod"
961 debug = false
962 max_tokens = 150000
963
964 secrets {
965 zendesk_token = $ZENDESK_API_TOKEN
966 intercom_token = $INTERCOM_API_TOKEN
967 slack_token = $SLACK_API_TOKEN
968 }
969
970 variables {
971 support_email = "support@company.com"
972 response_timeout = 4h
973 escalation_threshold = 24h
974 max_concurrent_tickets = 50
975 }
976}"#),
977 ("data-pipeline", "# Data Pipeline template - full content embedded"),
978 ("research", "# Research Assistant template - full content embedded"),
979];
980fn init_command(
981 template: String,
982 dir: Option<PathBuf>,
983 name: Option<String>,
984 force: bool,
985 verbose: bool,
986) -> Result<(), Box<dyn std::error::Error>> {
987 let template_content = EMBEDDED_TEMPLATES
988 .iter()
989 .find(|(t, _)| t == &template)
990 .map(|(_, content)| *content)
991 .ok_or_else(|| {
992 let available: Vec<&str> = EMBEDDED_TEMPLATES
993 .iter()
994 .map(|(name, _)| *name)
995 .collect();
996 format!(
997 "Unknown template '{}'. Available templates: {}", template, available
998 .join(", ")
999 )
1000 })?;
1001 let output_dir = dir
1002 .unwrap_or_else(|| {
1003 std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1004 });
1005 let filename = name
1006 .unwrap_or_else(|| {
1007 match template.as_str() {
1008 "ai-dev" => "ai_development_team.hlx".to_string(),
1009 "data-pipeline" => "data_pipeline.hlx".to_string(),
1010 _ => format!("{}.hlx", template),
1011 }
1012 });
1013 let output_path = output_dir.join(&filename);
1014 if output_path.exists() && !force {
1015 return Err(anyhow::anyhow!(
1016 "File '{}' already exists. Use --force to overwrite.", output_path
1017 .display()
1018 ).into());
1019 }
1020 if verbose {
1021 println!("🚀 Initializing HELIX project:");
1022 println!(" Template: {}", template);
1023 println!(" Output: {}", output_path.display());
1024 println!(" Force: {}", force);
1025 }
1026 if let Some(parent) = output_path.parent() {
1027 std::fs::create_dir_all(parent)?;
1028 }
1029 std::fs::write(&output_path, template_content)?;
1030 println!("✅ HELIX project initialized successfully!");
1031 println!(" Created: {}", output_path.display());
1032 println!(" Template: {}", template);
1033 if verbose {
1034 let content_size = template_content.len();
1035 println!(" Size: {} bytes", content_size);
1036 let description = match template.as_str() {
1037 "minimal" => "Simple hlx configuration with basic agent and workflow",
1038 "ai-dev" => {
1039 "Complete AI development team with specialized agents for full-stack development"
1040 }
1041 "support" => {
1042 "Multi-tier customer support system with escalation and knowledge management"
1043 }
1044 "data-pipeline" => {
1045 "High-throughput data processing pipeline with ML integration"
1046 }
1047 "research" => {
1048 "AI-powered research assistant for literature review and paper writing"
1049 }
1050 _ => "HELIX configuration template",
1051 };
1052 println!(" Description: {}", description);
1053 }
1054 println!("\n📋 Next steps:");
1055 println!(" 1. Review and customize the configuration");
1056 println!(" 2. Set up your API keys and environment variables");
1057 println!(" 3. Compile with: helix compile {}", filename);
1058 println!(" 4. Run with your hlx runtime");
1059 Ok(())
1060}
1061fn install_command(
1062 local_only: bool,
1063 force: bool,
1064 verbose: bool,
1065) -> Result<(), Box<dyn std::error::Error>> {
1066 if verbose {
1067 println!("🔧 Installing Helix compiler globally...");
1068 }
1069 let current_exe = std::env::current_exe()
1070 .map_err(|e| format!("Failed to get current executable path: {}", e))?;
1071 if verbose {
1072 println!(" Source: {}", current_exe.display());
1073 }
1074 let home_dir = std::env::var("HOME")
1075 .map_err(|e| format!("Failed to get HOME directory: {}", e))?;
1076 let baton_dir = PathBuf::from(&home_dir).join(".baton");
1077 let baton_bin_dir = baton_dir.join("bin");
1078 let target_binary = baton_bin_dir.join("hlx");
1079 if verbose {
1080 println!(" Target: {}", target_binary.display());
1081 }
1082 std::fs::create_dir_all(&baton_bin_dir)
1083 .map_err(|e| {
1084 format!("Failed to create directory {}: {}", baton_bin_dir.display(), e)
1085 })?;
1086 if verbose {
1087 println!(" ✅ Created directory: {}", baton_bin_dir.display());
1088 }
1089 if target_binary.exists() && !force {
1090 return Err(
1091 format!(
1092 "HELIX compiler already installed at {}. Use --force to overwrite.",
1093 target_binary.display()
1094 )
1095 .into(),
1096 );
1097 }
1098 std::fs::copy(¤t_exe, &target_binary)
1099 .map_err(|e| {
1100 format!("Failed to copy binary to {}: {}", target_binary.display(), e)
1101 })?;
1102 #[cfg(unix)]
1103 {
1104 use std::os::unix::fs::PermissionsExt;
1105 let mut perms = std::fs::metadata(&target_binary)?.permissions();
1106 perms.set_mode(0o755);
1107 std::fs::set_permissions(&target_binary, perms)?;
1108 }
1109 if verbose {
1110 println!(" ✅ Copied binary to: {}", target_binary.display());
1111 }
1112 println!("✅ Helix compiler installed successfully!");
1113 println!(" Location: {}", target_binary.display());
1114 if local_only {
1115 println!("\n📋 Local installation complete!");
1116 println!(" Add {} to your PATH to use 'hlx' command", baton_bin_dir.display());
1117 println!(" Or run: export PATH=\"{}:$PATH\"", baton_bin_dir.display());
1118 return Ok(());
1119 }
1120 let global_bin_paths = vec![
1121 PathBuf::from("/usr/local/bin"), PathBuf::from("/usr/bin"),
1122 PathBuf::from("/opt/homebrew/bin"),
1123 PathBuf::from("/home/linuxbrew/.linuxbrew/bin"),
1124 ];
1125 let mut symlink_created = false;
1126 for global_bin in global_bin_paths {
1127 if global_bin.exists() && global_bin.is_dir() {
1128 let symlink_path = global_bin.join("hlx");
1129 if symlink_path.exists() && !force {
1130 if verbose {
1131 println!(
1132 " ⚠️ Symlink already exists: {}", symlink_path.display()
1133 );
1134 }
1135 continue;
1136 }
1137 if symlink_path.exists() {
1138 std::fs::remove_file(&symlink_path)
1139 .map_err(|e| {
1140 format!(
1141 "Failed to remove existing symlink {}: {}", symlink_path
1142 .display(), e
1143 )
1144 })?;
1145 }
1146 #[cfg(unix)]
1147 let symlink_result = std::os::unix::fs::symlink(&target_binary, &symlink_path);
1148
1149 #[cfg(windows)]
1150 let symlink_result = {
1151 std::fs::copy(&target_binary, &symlink_path)
1153 .map(|_| ())
1154 .or_else(|_| std::os::windows::fs::symlink_file(&target_binary, &symlink_path))
1155 };
1156
1157 #[cfg(not(any(unix, windows)))]
1158 let symlink_result = std::fs::copy(&target_binary, &symlink_path).map(|_| ());
1159
1160 match symlink_result {
1161 Ok(_) => {
1162 println!(" ✅ Created global link: {}", symlink_path.display());
1163 symlink_created = true;
1164 break;
1165 }
1166 Err(e) => {
1167 if verbose {
1168 println!(
1169 " ⚠️ Failed to create link at {}: {}", symlink_path
1170 .display(), e
1171 );
1172 }
1173 continue;
1174 }
1175 }
1176 }
1177 }
1178 if symlink_created {
1179 println!("\n🎉 Global installation complete!");
1180 println!(" You can now use 'hlx' command from anywhere");
1181 println!(" Try: hlx --help");
1182 } else {
1183 println!("\n📋 Installation complete, but global symlink creation failed");
1184 println!(" This might be due to insufficient permissions");
1185 println!(
1186 " You can still use hlx by adding {} to your PATH", baton_bin_dir.display()
1187 );
1188 println!(" Or run: export PATH=\"{}:$PATH\"", baton_bin_dir.display());
1189 if verbose {
1190 println!("\n💡 To create global symlink manually:");
1191 println!(" sudo ln -sf {} /usr/local/bin/hlx", target_binary.display());
1192 }
1193 }
1194 Ok(())
1195}
1196fn build_project(
1197 input: Option<PathBuf>,
1198 output: Option<PathBuf>,
1199 optimize: u8,
1200 compress: bool,
1201 cache: bool,
1202 verbose: bool,
1203) -> Result<(), Box<dyn std::error::Error>> {
1204 let project_dir = find_project_root()?;
1205 let input_file = match input {
1206 Some(path) => path,
1207 None => {
1208 let main_file = project_dir.join("src").join("main.hlx");
1209 if main_file.exists() {
1210 main_file
1211 } else {
1212 return Err(
1213 anyhow::anyhow!(
1214 "No input file specified and no src/main.hlx found.\n\
1215 Specify a file with: helix build <file.hlx>"
1216 )
1217 .into(),
1218 );
1219 }
1220 }
1221 };
1222 let output_file = output
1223 .unwrap_or_else(|| {
1224 let target_dir = project_dir.join("target");
1225 let input_stem = input_file
1226 .file_stem()
1227 .and_then(|s| s.to_str())
1228 .unwrap_or("output");
1229 target_dir.join(format!("{}.hlxb", input_stem))
1230 });
1231 if verbose {
1232 println!("🔨 Building HELIX project:");
1233 println!(" Input: {}", input_file.display());
1234 println!(" Output: {}", output_file.display());
1235 println!(" Optimization: Level {}", optimize);
1236 println!(" Compression: {}", if compress { "Enabled" } else { "Disabled" });
1237 println!(" Cache: {}", if cache { "Enabled" } else { "Disabled" });
1238 }
1239 if let Some(parent) = output_file.parent() {
1240 std::fs::create_dir_all(parent)?;
1241 }
1242 let compiler = Compiler::builder()
1243 .optimization_level(OptimizationLevel::from(optimize))
1244 .compression(compress)
1245 .cache(cache)
1246 .verbose(verbose)
1247 .build();
1248 let binary = compiler.compile_file(&input_file)?;
1249 let serializer = crate::compiler::serializer::BinarySerializer::new(compress);
1250 serializer.write_to_file(&binary, &output_file)?;
1251 println!("✅ Build completed successfully!");
1252 println!(" Output: {}", output_file.display());
1253 println!(" Size: {} bytes", binary.size());
1254 if verbose {
1255 let stats = binary.symbol_table.stats();
1256 println!(
1257 " Strings: {} (unique: {})", stats.total_strings, stats.unique_strings
1258 );
1259 println!(" Agents: {}", stats.agents);
1260 println!(" Workflows: {}", stats.workflows);
1261 }
1262 Ok(())
1263}
1264fn find_project_root() -> Result<PathBuf, Box<dyn std::error::Error>> {
1265 let mut current_dir = std::env::current_dir()
1266 .context("Failed to get current directory")?;
1267 loop {
1268 let manifest_path = current_dir.join("project.hlx");
1269 if manifest_path.exists() {
1270 return Ok(current_dir);
1271 }
1272 if let Some(parent) = current_dir.parent() {
1273 current_dir = parent.to_path_buf();
1274 } else {
1275 break;
1276 }
1277 }
1278 Err(anyhow::anyhow!("No HELIX project found. Run 'helix init' first.").into())
1279}
1280async fn dataset_command(
1282 action: DatasetAction,
1283 verbose: bool,
1284) -> Result<(), Box<dyn std::error::Error>> {
1285 match action {
1286 DatasetAction::Process { files, output, format, algorithm, validate } => {
1287 if verbose {
1288 println!("🧠 Processing datasets with HLX-AI...");
1289 println!(" Files: {:?}", files);
1290 println!(" Output: {:?}", output);
1291 println!(" Format: {:?}", format);
1292 println!(" Algorithm: {:?}", algorithm);
1293 println!(" Validate: {}", validate);
1294 }
1295
1296 use crate::json::core::{GenericJSONDataset, DataFormat};
1298
1299 for file in &files {
1300 if verbose {
1301 println!("📊 Processing: {}", file.display());
1302 }
1303
1304 let dataset = GenericJSONDataset::new(&[file.clone()], None, DataFormat::Auto)
1305 .map_err(|e| format!("Failed to load dataset {}: {}", file.display(), e))?;
1306
1307 let training_dataset = dataset.to_training_dataset()
1308 .map_err(|e| format!("Failed to convert dataset {}: {}", file.display(), e))?;
1309
1310 if validate {
1311 let quality = training_dataset.quality_assessment();
1312 println!("✅ Quality Score: {:.2}", quality.overall_score);
1313 if !quality.issues.is_empty() {
1314 println!("⚠️ Issues:");
1315 for issue in &quality.issues {
1316 println!(" - {}", issue);
1317 }
1318 }
1319 }
1320
1321 if let Some(algo) = &algorithm {
1322 if training_dataset.to_algorithm_format(algo).is_ok() {
1323 println!("✅ Converted to {} format", algo.to_uppercase());
1324 } else {
1325 println!("❌ Failed to convert to {} format", algo.to_uppercase());
1326 }
1327 }
1328
1329 println!("📈 Dataset stats: {} samples", training_dataset.samples.len());
1330 }
1331
1332 println!("🎉 Dataset processing completed!");
1333 Ok(())
1334 }
1335 DatasetAction::Analyze { files, detailed } => {
1336 if verbose {
1337 println!("🔍 Analyzing datasets...");
1338 }
1339
1340 use crate::json::core::{GenericJSONDataset, DataFormat};
1341
1342 for file in files {
1343 if verbose {
1344 println!("📊 Analyzing: {}", file.display());
1345 }
1346
1347 let dataset = GenericJSONDataset::new(&[file.clone()], None, DataFormat::Auto)
1348 .map_err(|e| format!("Failed to load dataset {}: {}", file.display(), e))?;
1349
1350 println!("\n--- Dataset Analysis: {} ---", file.display());
1351 for (key, value) in dataset.stats() {
1352 println!("{:15}: {}", key, value);
1353 }
1354
1355 if detailed {
1356 let training_dataset = dataset.to_training_dataset()
1357 .map_err(|e| format!("Failed to convert dataset {}: {}", file.display(), e))?;
1358
1359 println!("\n--- Training Format Analysis ---");
1360 println!("Format: {:?}", training_dataset.format);
1361 println!("Samples: {}", training_dataset.samples.len());
1362 println!("Avg Prompt Length: {:.1}", training_dataset.statistics.avg_prompt_length);
1363 println!("Avg Completion Length: {:.1}", training_dataset.statistics.avg_completion_length);
1364
1365 println!("\n--- Field Coverage ---");
1366 for (field, coverage) in &training_dataset.statistics.field_coverage {
1367 println!("{:12}: {:.1}%", field, coverage * 100.0);
1368 }
1369 }
1370 }
1371
1372 Ok(())
1373 }
1374 DatasetAction::Convert { input, output: _output, from_format, to_format } => {
1375 if verbose {
1376 println!("🔄 Converting dataset format...");
1377 println!(" Input: {}", input.display());
1378 println!(" From: {}", from_format);
1379 println!(" To: {}", to_format);
1380 }
1381
1382 println!("🔄 Format conversion: {} → {}", from_format, to_format);
1384 println!("✅ Conversion completed (placeholder)");
1385 Ok(())
1386 }
1387 DatasetAction::Quality { files, report } => {
1388 if verbose {
1389 println!("📊 Assessing dataset quality...");
1390 }
1391
1392 use crate::json::core::{GenericJSONDataset, DataFormat};
1393
1394 for file in files {
1395 let dataset = GenericJSONDataset::new(&[file.clone()], None, DataFormat::Auto)
1396 .map_err(|e| format!("Failed to load dataset {}: {}", file.display(), e))?;
1397
1398 let training_dataset = dataset.to_training_dataset()
1399 .map_err(|e| format!("Failed to convert dataset {}: {}", file.display(), e))?;
1400
1401 let quality = training_dataset.quality_assessment();
1402
1403 if report {
1404 println!("\n=== Quality Report: {} ===", file.display());
1405 println!("Overall Score: {:.2}/1.0", quality.overall_score);
1406 println!("\nIssues:");
1407 if quality.issues.is_empty() {
1408 println!(" ✅ No issues found");
1409 } else {
1410 for issue in &quality.issues {
1411 println!(" ⚠️ {}", issue);
1412 }
1413 }
1414 println!("\nRecommendations:");
1415 for rec in &quality.recommendations {
1416 println!(" 💡 {}", rec);
1417 }
1418 } else {
1419 println!("📊 {}: Quality Score {:.2}", file.display(), quality.overall_score);
1420 }
1421 }
1422
1423 Ok(())
1424 }
1425 DatasetAction::Huggingface { dataset, split, output, cache_dir } => {
1426 if verbose {
1427 println!("🤗 Loading HuggingFace dataset...");
1428 println!(" Dataset: {}", dataset);
1429 println!(" Split: {:?}", split.as_ref().unwrap_or(&"train".to_string()));
1430 println!(" Cache: {:?}", cache_dir);
1431 println!(" Output: {:?}", output);
1432 }
1433
1434 let processor = crate::json::HfProcessor::new(cache_dir.unwrap_or_else(|| PathBuf::from("./hf_cache")));
1436
1437 let config = crate::json::HfDatasetConfig {
1438 source: dataset.clone(),
1439 split: split.unwrap_or_else(|| "train".to_string()),
1440 format: None,
1441 rpl_filter: None,
1442 revision: None,
1443 streaming: false,
1444 trust_remote_code: false,
1445 num_proc: None,
1446 };
1447
1448 match processor.process_dataset(&dataset, &config).await {
1450 Ok(training_dataset) => {
1451 println!("✅ HuggingFace dataset loaded successfully");
1452 println!("📊 Samples: {}", training_dataset.samples.len());
1453 println!("📝 Format: {:?}", training_dataset.format);
1454
1455 if let Some(output_path) = output {
1457 let json_output = serde_json::to_string_pretty(&training_dataset.samples)
1458 .map_err(|e| format!("Failed to serialize output: {}", e))?;
1459 std::fs::write(&output_path, json_output)
1460 .map_err(|e| format!("Failed to write output file {}: {}", output_path.display(), e))?;
1461 println!("💾 Saved processed dataset to: {}", output_path.display());
1462 }
1463 }
1464 Err(e) => {
1465 println!("❌ Failed to load HuggingFace dataset: {}", e);
1466 return Err(e.into());
1467 }
1468 }
1469
1470 Ok(())
1471 }
1472 }
1473}
1474
1475fn concat_command(
1476 directory: PathBuf,
1477 preset: String,
1478 output_dir: Option<PathBuf>,
1479 dry_run: bool,
1480 deduplicate: bool,
1481 verbose: bool,
1482) -> Result<(), Box<dyn std::error::Error>> {
1483 if verbose {
1484 println!("🔗 Concatenating files...");
1485 println!(" Directory: {}", directory.display());
1486 println!(" Preset: {}", preset);
1487 println!(" Output: {:?}", output_dir);
1488 println!(" Dry Run: {}", dry_run);
1489 println!(" Deduplicate: {}", deduplicate);
1490 }
1491
1492 use crate::json::concat::{ConcatConfig, FileExtensionPreset};
1493
1494 let config = match preset.as_str() {
1495 "caption+wd+tags" => ConcatConfig::from_preset(FileExtensionPreset::CaptionWdTags),
1496 "florence+wd+tags" => ConcatConfig::from_preset(FileExtensionPreset::FlorenceWdTags),
1497 _ => {
1498 return Err(format!("Unknown preset: {}. Use 'caption+wd+tags' or 'florence+wd+tags'", preset).into());
1499 }
1500 };
1501
1502 let _config = if deduplicate {
1503 config.with_deduplication(true)
1504 } else {
1505 config
1506 };
1507
1508 println!("🔄 Concatenating files in: {}", directory.display());
1510 println!("📝 Using preset: {}", preset);
1511
1512 if dry_run {
1513 println!("🔍 Dry run mode - no files will be modified");
1514 }
1515
1516 println!("✅ Concatenation completed (placeholder)");
1517 Ok(())
1518}
1519
1520async fn caption_command(
1521 action: CaptionAction,
1522 verbose: bool,
1523) -> Result<(), Box<dyn std::error::Error>> {
1524 match action {
1525 CaptionAction::Process { files, output, config } => {
1526 if verbose {
1527 println!("📝 Processing caption files...");
1528 println!(" Files: {:?}", files);
1529 println!(" Output: {:?}", output);
1530 println!(" Config: {:?}", config);
1531 }
1532
1533
1534 for file in files {
1535 if verbose {
1536 println!("🎨 Processing: {}", file.display());
1537 }
1538
1539 match crate::json::caption::process_file(&file).await {
1541 Ok(_) => println!("✅ Processed: {}", file.display()),
1542 Err(e) => println!("❌ Failed to process {}: {}", file.display(), e),
1543 }
1544 }
1545
1546 Ok(())
1547 }
1548 CaptionAction::E621 { files, output, filter_tags, format } => {
1549 if verbose {
1550 println!("🔞 Processing E621 captions...");
1551 println!(" Filter tags: {}", filter_tags);
1552 println!(" Format: {:?}", format);
1553 println!(" Output: {:?}", output);
1554 }
1555
1556 use crate::json::caption::{E621Config, process_e621_json_file};
1557
1558 let config = E621Config::new()
1559 .with_filter_tags(filter_tags)
1560 .with_format(format);
1561
1562 for file in files {
1563 if verbose {
1564 println!("🎨 Processing E621: {}", file.display());
1565 }
1566
1567 match process_e621_json_file(&file, Some(config.clone())).await {
1569 Ok(_) => {
1570 println!("✅ Processed E621 file: {}", file.display());
1571 if let Some(output_path) = &output {
1573 let file_name = file.file_name().unwrap_or_default();
1574 let target_path = output_path.join(file_name);
1575 if let Some(parent) = target_path.parent() {
1576 std::fs::create_dir_all(parent)?;
1577 }
1578 match std::fs::copy(&file, &target_path) {
1579 Ok(_) => println!("💾 Saved processed file to: {}", target_path.display()),
1580 Err(e) => println!("⚠️ Failed to save to output: {}", e),
1581 }
1582 }
1583 }
1584 Err(e) => println!("❌ Failed to process E621 file {}: {}", file.display(), e),
1585 }
1586 }
1587
1588 Ok(())
1589 }
1590 CaptionAction::Convert { input, output, format } => {
1591 if verbose {
1592 println!("🔄 Converting caption format...");
1593 println!(" Input: {}", input.display());
1594 println!(" Output: {:?}", output);
1595 println!(" Format: {:?}", format);
1596 }
1597
1598 println!("🔄 Converting caption format (placeholder)");
1599 println!("✅ Conversion completed");
1600 Ok(())
1601 }
1602 }
1603}
1604
1605async fn json_command(
1606 action: JsonAction,
1607 verbose: bool,
1608) -> Result<(), Box<dyn std::error::Error>> {
1609 match action {
1610 JsonAction::Format { files, check } => {
1611 if verbose {
1612 println!("🎨 Formatting JSON files...");
1613 println!(" Check only: {}", check);
1614 }
1615
1616 use crate::json::format_json_file;
1617
1618 for file in files {
1619 if verbose {
1620 println!("📝 Formatting: {}", file.display());
1621 }
1622
1623 if check {
1624 match format_json_file(file.clone()).await {
1626 Ok(_) => println!("✅ {} is properly formatted", file.display()),
1627 Err(e) => println!("❌ {} needs formatting: {}", file.display(), e),
1628 }
1629 } else {
1630 match format_json_file(file.clone()).await {
1632 Ok(_) => println!("✅ Formatted: {}", file.display()),
1633 Err(e) => println!("❌ Failed to format {}: {}", file.display(), e),
1634 }
1635 }
1636 }
1637
1638 Ok(())
1639 }
1640 JsonAction::Validate { files, schema } => {
1641 if verbose {
1642 println!("✅ Validating JSON files...");
1643 println!(" Schema: {:?}", schema);
1644 }
1645
1646 use crate::json::core::{GenericJSONDataset, DataFormat};
1647
1648 for file in files {
1649 if verbose {
1650 println!("🔍 Validating: {}", file.display());
1651 }
1652
1653 match GenericJSONDataset::new(&[file.clone()], schema.as_deref(), DataFormat::Auto) {
1654 Ok(dataset) => {
1655 println!("✅ {} is valid JSON", file.display());
1656 if verbose {
1657 println!(" Samples: {}", dataset.len());
1658 println!(" Format: {:?}", dataset.format);
1659 }
1660 }
1661 Err(e) => println!("❌ {} validation failed: {}", file.display(), e),
1662 }
1663 }
1664
1665 Ok(())
1666 }
1667 JsonAction::Metadata { files, output } => {
1668 if verbose {
1669 println!("📊 Extracting JSON metadata...");
1670 println!(" Output: {:?}", output);
1671 }
1672
1673 use crate::json::process_safetensors_file;
1674
1675 for file in files {
1676 if file.extension().and_then(|s| s.to_str()) == Some("safetensors") {
1677 if verbose {
1678 println!("🔍 Processing SafeTensors: {}", file.display());
1679 }
1680
1681 match process_safetensors_file(&file).await {
1683 Ok(_) => println!("✅ Metadata extracted from: {}", file.display()),
1684 Err(e) => println!("❌ Failed to extract metadata from {}: {}", file.display(), e),
1685 }
1686 } else {
1687 println!("⚠️ Skipping non-SafeTensors file: {}", file.display());
1688 }
1689 }
1690
1691 Ok(())
1692 }
1693 JsonAction::Split { file, output } => {
1694 if verbose {
1695 println!("✂️ Splitting JSON file...");
1696 println!(" Input: {}", file.display());
1697 println!(" Output: {:?}", output);
1698 }
1699
1700 use crate::json::split_content;
1701
1702 let content = tokio::fs::read_to_string(&file).await?;
1704 let (tags, sentences) = split_content(&content);
1705 println!("✅ Split {}: {} tags, {} sentences", file.display(), tags.len(), sentences.len());
1706
1707 if let Some(output_path) = output {
1708 let split_data = serde_json::json!({
1709 "tags": tags,
1710 "sentences": sentences
1711 });
1712 let json_output = serde_json::to_string_pretty(&split_data)
1713 .map_err(|e| format!("Failed to serialize split data: {}", e))?;
1714 std::fs::write(&output_path, json_output)
1715 .map_err(|e| format!("Failed to write split output to {}: {}", output_path.display(), e))?;
1716 println!("💾 Saved split data to: {}", output_path.display());
1717 }
1718 Ok(())
1719 }
1720 JsonAction::Merge { files, output } => {
1721 if verbose {
1722 println!("🔗 Merging JSON files...");
1723 println!(" Output: {}", output.display());
1724 }
1725
1726 use crate::json::core::{run_json_cmd, JsonArgs};
1727
1728 let args = JsonArgs {
1730 data_dir: vec![],
1731 file: files.into_iter().map(|p| p.to_string_lossy().to_string()).collect(),
1732 schema_dir: None,
1733 format: crate::json::core::DataFormat::Auto,
1734 merge_output: Some(output),
1735 show_stats: verbose,
1736 seed: 42,
1737 multi_process: false,
1738 input_folder: None,
1739 output: None,
1740 jobs: num_cpus::get(),
1741 };
1742
1743 match run_json_cmd(args).await {
1745 Ok(_) => println!("✅ Successfully merged JSON files"),
1746 Err(e) => println!("❌ Failed to merge JSON files: {}", e),
1747 }
1748 Ok(())
1749 }
1750 }
1751}
1752
1753#[cfg(test)]
1754mod tests {
1755 use super::*;
1756 #[test]
1757 fn test_cli_parsing() {
1758 let cli = Cli::try_parse_from([
1759 "helix",
1760 "compile",
1761 "test.hlx",
1762 "-O3",
1763 "--compress",
1764 ]);
1765 assert!(cli.is_ok());
1766 }
1767}