helix_core/compiler/
cli.rs

1use clap::{Parser, Subcommand};
2use std::path::PathBuf;
3use anyhow::Context;
4use crate::compiler::{
5    Compiler, optimizer::OptimizationLevel, loader::BinaryLoader,
6    bundle::Bundler,
7};
8use crate::server::{ServerConfig, start_server};
9mod project;
10mod workflow;
11mod tools;
12mod publish;
13mod config;
14use project::*;
15use workflow::*;
16use tools::*;
17use publish::*;
18use config::*;
19#[derive(Parser)]
20#[command(name = "hlx")]
21#[command(version = env!("CARGO_PKG_VERSION"))]
22#[command(about = "HELIX Compiler - Configuration without the pain")]
23#[command(long_about = None)]
24pub struct Cli {
25    #[arg(short, long, global = true)]
26    verbose: bool,
27    #[command(subcommand)]
28    command: Commands,
29}
30#[derive(Subcommand)]
31enum WorkflowAction {
32    Watch {
33        directory: PathBuf,
34        #[arg(short, long)]
35        output: Option<PathBuf>,
36        #[arg(short = 'O', long, default_value = "2")]
37        optimize: u8,
38    },
39    Start { directory: PathBuf, #[arg(short, long)] output: Option<PathBuf> },
40    Stop,
41    Status,
42    List,
43    Pause { workflow_id: String },
44    Resume { workflow_id: String },
45    Kill { workflow_id: String },
46}
47
48#[derive(Subcommand)]
49enum DatasetAction {
50    Process {
51        files: Vec<PathBuf>,
52        #[arg(short, long)]
53        output: Option<PathBuf>,
54        #[arg(long)]
55        format: Option<String>,
56        #[arg(long)]
57        algorithm: Option<String>,
58        #[arg(long)]
59        validate: bool,
60    },
61    Analyze {
62        files: Vec<PathBuf>,
63        #[arg(long)]
64        detailed: bool,
65    },
66    Convert {
67        input: PathBuf,
68        #[arg(short, long)]
69        output: Option<PathBuf>,
70        #[arg(long)]
71        from_format: String,
72        #[arg(long)]
73        to_format: String,
74    },
75    Quality {
76        files: Vec<PathBuf>,
77        #[arg(long)]
78        report: bool,
79    },
80    Huggingface {
81        dataset: String,
82        #[arg(long)]
83        split: Option<String>,
84        #[arg(long)]
85        output: Option<PathBuf>,
86        #[arg(long)]
87        cache_dir: Option<PathBuf>,
88    },
89}
90
91#[derive(Subcommand)]
92enum CaptionAction {
93    Process {
94        files: Vec<PathBuf>,
95        #[arg(short, long)]
96        output: Option<PathBuf>,
97        #[arg(long)]
98        config: Option<PathBuf>,
99    },
100    E621 {
101        files: Vec<PathBuf>,
102        #[arg(short, long)]
103        output: Option<PathBuf>,
104        #[arg(long)]
105        filter_tags: bool,
106        #[arg(long)]
107        format: Option<String>,
108    },
109    Convert {
110        input: PathBuf,
111        #[arg(short, long)]
112        output: Option<PathBuf>,
113        #[arg(long)]
114        format: Option<String>,
115    },
116}
117
118#[derive(Subcommand)]
119enum JsonAction {
120    Format {
121        files: Vec<PathBuf>,
122        #[arg(long)]
123        check: bool,
124    },
125    Validate {
126        files: Vec<PathBuf>,
127        #[arg(long)]
128        schema: Option<PathBuf>,
129    },
130    Metadata {
131        files: Vec<PathBuf>,
132        #[arg(short, long)]
133        output: Option<PathBuf>,
134    },
135    Split {
136        file: PathBuf,
137        #[arg(short, long)]
138        output: Option<PathBuf>,
139    },
140    Merge {
141        files: Vec<PathBuf>,
142        #[arg(short, long)]
143        output: PathBuf,
144    },
145}
146#[derive(Subcommand)]
147enum Commands {
148    Compile {
149        input: PathBuf,
150        #[arg(short, long)]
151        output: Option<PathBuf>,
152        #[arg(short, long)]
153        compress: bool,
154        #[arg(short = 'O', long, default_value = "2")]
155        optimize: u8,
156        #[arg(long)]
157        cache: bool,
158    },
159    Decompile { input: PathBuf, #[arg(short, long)] output: Option<PathBuf> },
160    Validate { file: PathBuf, #[arg(short, long)] detailed: bool },
161    Bundle {
162        directory: PathBuf,
163        #[arg(short, long, default_value = "bundle.hlxb")]
164        output: PathBuf,
165        #[arg(short, long)]
166        include: Vec<String>,
167        #[arg(short = 'x', long)]
168        exclude: Vec<String>,
169        #[arg(long)]
170        tree_shake: bool,
171        #[arg(short = 'O', long, default_value = "2")]
172        optimize: u8,
173    },
174    Info {
175        file: PathBuf,
176        #[arg(short, long, default_value = "text")]
177        format: String,
178        #[arg(long)]
179        symbols: bool,
180        #[arg(long)]
181        sections: bool,
182    },
183    Watch {
184        directory: PathBuf,
185        #[arg(short, long)]
186        output: Option<PathBuf>,
187        #[arg(short = 'O', long, default_value = "2")]
188        optimize: u8,
189    },
190    Diff { file1: PathBuf, file2: PathBuf, #[arg(short, long)] detailed: bool },
191    Optimize {
192        input: PathBuf,
193        #[arg(short, long)]
194        output: Option<PathBuf>,
195        #[arg(short = 'O', long, default_value = "3")]
196        level: u8,
197    },
198    Init {
199        #[arg(short, long)]
200        name: Option<String>,
201        #[arg(short, long)]
202        dir: Option<PathBuf>,
203        #[arg(short, long, default_value = "minimal")]
204        template: String,
205        #[arg(short, long)]
206        force: bool,
207    },
208    Install {
209        #[arg(long)]
210        local_only: bool,
211        #[arg(short, long)]
212        force: bool,
213        #[arg(short, long)]
214        verbose: bool,
215    },
216    Add {
217        dependency: String,
218        #[arg(short, long)]
219        version: Option<String>,
220        #[arg(long)]
221        dev: bool,
222    },
223    Remove { dependency: String, #[arg(long)] dev: bool },
224    Clean { #[arg(long)] all: bool, #[arg(long)] cache: bool },
225    Reset { #[arg(short, long)] force: bool },
226    Build {
227        input: Option<PathBuf>,
228        #[arg(short, long)]
229        output: Option<PathBuf>,
230        #[arg(short = 'O', long, default_value = "2")]
231        optimize: u8,
232        #[arg(short, long)]
233        compress: bool,
234        #[arg(long)]
235        cache: bool,
236    },
237    Run {
238        input: Option<PathBuf>,
239        args: Vec<String>,
240        #[arg(short = 'O', long, default_value = "2")]
241        optimize: u8,
242    },
243    Test { #[arg(short, long)] pattern: Option<String>, #[arg(long)] integration: bool },
244    Bench {
245        #[arg(short, long)]
246        pattern: Option<String>,
247        #[arg(short, long)]
248        iterations: Option<usize>,
249    },
250    Serve {
251        #[arg(short, long)]
252        port: Option<u16>,
253        #[arg(long)]
254        domain: Option<String>,
255        #[arg(short, long)]
256        directory: Option<PathBuf>,
257        #[arg(long)]
258        no_convert: bool,
259        #[arg(long)]
260        cache_timeout: Option<u64>,
261        #[arg(long)]
262        max_file_size: Option<u64>,
263    },
264    Fmt { files: Vec<PathBuf>, #[arg(long)] check: bool },
265    Lint { files: Vec<PathBuf> },
266    Generate {
267        template: String,
268        #[arg(short, long)]
269        output: Option<PathBuf>,
270        #[arg(short, long)]
271        name: Option<String>,
272        #[arg(short, long)]
273        force: bool,
274    },
275    Publish {
276        #[arg(short, long)]
277        registry: Option<String>,
278        #[arg(short, long)]
279        token: Option<String>,
280        #[arg(long)]
281        dry_run: bool,
282    },
283    Sign {
284        input: PathBuf,
285        #[arg(short, long)]
286        key: Option<String>,
287        #[arg(short, long)]
288        output: Option<PathBuf>,
289        #[arg(long)]
290        verify: bool,
291    },
292    Export {
293        format: String,
294        #[arg(short, long)]
295        output: Option<PathBuf>,
296        #[arg(long)]
297        include_deps: bool,
298    },
299    Import {
300        input: PathBuf,
301        #[arg(short, long)]
302        format: Option<String>,
303        #[arg(short, long)]
304        force: bool,
305    },
306    Config { action: String, key: Option<String>, value: Option<String> },
307    Cache { action: String },
308    Doctor,
309    ServeProject {
310        #[arg(short, long)]
311        port: Option<u16>,
312        #[arg(long)]
313        host: Option<String>,
314        #[arg(short, long)]
315        directory: Option<PathBuf>,
316    },
317    Workflow { #[command(subcommand)] action: WorkflowAction },
318    // HLX-AI Commands for intelligent dataset processing
319    Dataset {
320        #[command(subcommand)]
321        action: DatasetAction,
322    },
323    Concat {
324        directory: PathBuf,
325        #[arg(short, long, default_value = "caption+wd+tags")]
326        preset: String,
327        #[arg(short, long)]
328        output_dir: Option<PathBuf>,
329        #[arg(long)]
330        dry_run: bool,
331        #[arg(long)]
332        deduplicate: bool,
333    },
334    Caption {
335        #[command(subcommand)]
336        action: CaptionAction,
337    },
338    Json {
339        #[command(subcommand)]
340        action: JsonAction,
341    },
342}
343pub async fn run() -> Result<(), Box<dyn std::error::Error>> {
344    let cli = Cli::parse();
345    match cli.command {
346        Commands::Compile { input, output, compress, optimize, cache } => {
347            compile_command(input, output, compress, optimize, cache, cli.verbose)
348        }
349        Commands::Decompile { input, output } => {
350            decompile_command(input, output, cli.verbose)
351        }
352        Commands::Validate { file, detailed } => {
353            validate_command(file, detailed || cli.verbose)
354        }
355        Commands::Bundle {
356            directory,
357            output,
358            include,
359            exclude,
360            tree_shake,
361            optimize,
362        } => {
363            bundle_command(
364                directory,
365                output,
366                include,
367                exclude,
368                tree_shake,
369                optimize,
370                cli.verbose,
371            )
372        }
373        Commands::Info { file, format, symbols, sections } => {
374            info_command(file, format, symbols, sections, cli.verbose)
375        }
376        Commands::Watch { directory, output, optimize } => {
377            watch_command(directory, output, optimize, cli.verbose)
378        }
379        Commands::Diff { file1, file2, detailed } => {
380            diff_command(file1, file2, detailed || cli.verbose)
381        }
382        Commands::Optimize { input, output, level } => {
383            optimize_command(input, output, level, cli.verbose)
384        }
385        Commands::Init { name, dir, template, force } => {
386            init_command(template, dir, name, force, cli.verbose)?;
387            Ok(())
388        }
389        Commands::Install { local_only, force, verbose } => {
390            install_command(local_only, force, verbose || cli.verbose)
391        }
392        Commands::Add { dependency, version, dev } => {
393            add_dependency(dependency, version, dev, cli.verbose)?;
394            Ok(())
395        }
396        Commands::Remove { dependency, dev } => {
397            remove_dependency(dependency, dev, cli.verbose)?;
398            Ok(())
399        }
400        Commands::Clean { all, cache } => {
401            clean_project(all, cache, cli.verbose)?;
402            Ok(())
403        }
404        Commands::Reset { force } => {
405            reset_project(force, cli.verbose)?;
406            Ok(())
407        }
408        Commands::Build { input, output, optimize, compress, cache } => {
409            build_project(input, output, optimize, compress, cache, cli.verbose)
410        }
411        Commands::Run { input, args, optimize } => {
412            run_project(input, args, optimize, cli.verbose)?;
413            Ok(())
414        }
415        Commands::Test { pattern, integration } => {
416            run_tests(pattern, cli.verbose, integration)?;
417            Ok(())
418        }
419        Commands::Bench { pattern, iterations } => {
420            run_benchmarks(pattern, iterations, cli.verbose)?;
421            Ok(())
422        }
423        Commands::Serve {
424            port,
425            domain,
426            directory,
427            no_convert,
428            cache_timeout,
429            max_file_size,
430        } => {
431            let mut config = ServerConfig::default();
432            if let Some(p) = port {
433                config.port = p;
434            }
435            if let Some(d) = domain {
436                config.domain = d;
437            }
438            if let Some(dir) = directory {
439                config.root_directory = dir;
440            }
441            config.auto_convert = !no_convert;
442            if let Some(ct) = cache_timeout {
443                config.cache_timeout = ct;
444            }
445            if let Some(mfs) = max_file_size {
446                config.max_file_size = mfs;
447            }
448            config.verbose = cli.verbose;
449            start_server(config)?;
450            Ok(())
451        }
452        Commands::Fmt { files, check } => {
453            format_files(files, check, cli.verbose)?;
454            Ok(())
455        }
456        Commands::Lint { files } => {
457            lint_files(files, cli.verbose)?;
458            Ok(())
459        }
460        Commands::Generate { template, output, name, force } => {
461            generate_code(template, output, name, force, cli.verbose)?;
462            Ok(())
463        }
464        Commands::Publish { registry, token, dry_run } => {
465            publish_project(registry, token, dry_run, cli.verbose)?;
466            Ok(())
467        }
468        Commands::Sign { input, key, output, verify } => {
469            sign_binary(input, key, output, verify, cli.verbose)?;
470            Ok(())
471        }
472        Commands::Export { format, output, include_deps } => {
473            export_project(format, output, include_deps, cli.verbose)?;
474            Ok(())
475        }
476        Commands::Import { input, format, force } => {
477            import_project(input, format, force, cli.verbose)?;
478            Ok(())
479        }
480        Commands::Config { action, key, value } => {
481            manage_config(action.parse()?, key, value, cli.verbose)?;
482            Ok(())
483        }
484        Commands::Cache { action } => {
485            manage_cache(action.parse()?, cli.verbose)?;
486            Ok(())
487        }
488        Commands::Doctor => {
489            run_diagnostics(cli.verbose)?;
490            Ok(())
491        }
492        Commands::ServeProject { port, host, directory } => {
493            Ok(serve_project(port, host, directory, cli.verbose)?)
494        }
495        Commands::Dataset { action } => {
496            dataset_command(action, cli.verbose).await
497        }
498        Commands::Concat { directory, preset, output_dir, dry_run, deduplicate } => {
499            concat_command(directory, preset, output_dir, dry_run, deduplicate, cli.verbose)
500        }
501        Commands::Caption { action } => {
502            caption_command(action, cli.verbose).await
503        }
504        Commands::Json { action } => {
505            json_command(action, cli.verbose).await
506        }
507        Commands::Workflow { action } => {
508            match action {
509                WorkflowAction::Watch { directory, output, optimize } => {
510                    watch_command(directory, output, optimize, cli.verbose)
511                }
512                WorkflowAction::Start { directory, output } => {
513                    Ok(start_hot_reload(directory, output, cli.verbose)?)
514                }
515                WorkflowAction::Stop => Ok(stop_hot_reload(cli.verbose)?),
516                WorkflowAction::Status => Ok(get_workflow_status(cli.verbose)?),
517                WorkflowAction::List => Ok(list_workflows(cli.verbose)?),
518                WorkflowAction::Pause { workflow_id } => {
519                    Ok(pause_workflow(workflow_id, cli.verbose)?)
520                }
521                WorkflowAction::Resume { workflow_id } => {
522                    Ok(resume_workflow(workflow_id, cli.verbose)?)
523                }
524                WorkflowAction::Kill { workflow_id } => {
525                    Ok(stop_workflow(workflow_id, cli.verbose)?)
526                }
527            }
528        }
529    }
530}
531fn compile_command(
532    input: PathBuf,
533    output: Option<PathBuf>,
534    compress: bool,
535    optimize: u8,
536    cache: bool,
537    verbose: bool,
538) -> Result<(), Box<dyn std::error::Error>> {
539    let output_path = output
540        .unwrap_or_else(|| {
541            let mut path = input.clone();
542            path.set_extension("hlxb");
543            path
544        });
545    if verbose {
546        println!("📦 Compiling: {}", input.display());
547        println!("  Optimization: Level {}", optimize);
548        println!("  Compression: {}", if compress { "Enabled" } else { "Disabled" });
549        println!("  Cache: {}", if cache { "Enabled" } else { "Disabled" });
550    }
551    let compiler = Compiler::builder()
552        .optimization_level(OptimizationLevel::from(optimize))
553        .compression(compress)
554        .cache(cache)
555        .verbose(verbose)
556        .build();
557    let binary = compiler.compile_file(&input)?;
558    let serializer = crate::compiler::serializer::BinarySerializer::new(compress);
559    serializer.write_to_file(&binary, &output_path)?;
560    println!("✅ Compiled successfully: {}", output_path.display());
561    println!("  Size: {} bytes", binary.size());
562    if verbose {
563        let stats = binary.symbol_table.stats();
564        println!(
565            "  Strings: {} (unique: {})", stats.total_strings, stats.unique_strings
566        );
567        println!("  Agents: {}", stats.agents);
568        println!("  Workflows: {}", stats.workflows);
569    }
570    Ok(())
571}
572fn decompile_command(
573    input: PathBuf,
574    output: Option<PathBuf>,
575    verbose: bool,
576) -> Result<(), Box<dyn std::error::Error>> {
577    let output_path = output
578        .unwrap_or_else(|| {
579            let mut path = input.clone();
580            path.set_extension("hlx");
581            path
582        });
583    if verbose {
584        println!("🔄 Decompiling: {}", input.display());
585    }
586    let loader = BinaryLoader::new();
587    let binary = loader.load_file(&input)?;
588    let compiler = Compiler::new(OptimizationLevel::Zero);
589    let source = compiler.decompile(&binary)?;
590    std::fs::write(&output_path, source)?;
591    println!("✅ Decompiled successfully: {}", output_path.display());
592    Ok(())
593}
594fn validate_command(
595    file: PathBuf,
596    detailed: bool,
597) -> Result<(), Box<dyn std::error::Error>> {
598    let extension = file.extension().and_then(|s| s.to_str());
599    match extension {
600        Some("hlx") => {
601            let source = std::fs::read_to_string(&file)?;
602            let ast = crate::parse(&source)?;
603            crate::validate(&ast)?;
604            println!("✅ Valid HELIX file: {}", file.display());
605            if detailed {
606                println!("  Declarations: {}", ast.declarations.len());
607            }
608        }
609        Some("hlxb") => {
610            let loader = BinaryLoader::new();
611            let binary = loader.load_file(&file)?;
612            binary.validate()?;
613            println!("✅ Valid HLXB file: {}", file.display());
614            if detailed {
615                println!("  Version: {}", binary.version);
616                println!("  Sections: {}", binary.data_sections.len());
617                println!("  Checksum: {:x}", binary.checksum);
618            }
619        }
620        _ => {
621            return Err("Unknown file type (expected .hlx or .hlxb)".into());
622        }
623    }
624    Ok(())
625}
626fn bundle_command(
627    directory: PathBuf,
628    output: PathBuf,
629    include: Vec<String>,
630    exclude: Vec<String>,
631    tree_shake: bool,
632    optimize: u8,
633    verbose: bool,
634) -> Result<(), Box<dyn std::error::Error>> {
635    if verbose {
636        println!("📦 Bundling directory: {}", directory.display());
637        if !include.is_empty() {
638            println!("  Include patterns: {:?}", include);
639        }
640        if !exclude.is_empty() {
641            println!("  Exclude patterns: {:?}", exclude);
642        }
643        println!("  Tree shaking: {}", if tree_shake { "Enabled" } else { "Disabled" });
644    }
645    let mut bundler = Bundler::new().with_tree_shaking(tree_shake).verbose(verbose);
646    for pattern in include {
647        bundler = bundler.include(&pattern);
648    }
649    for pattern in exclude {
650        bundler = bundler.exclude(&pattern);
651    }
652    let binary = bundler
653        .bundle_directory(&directory, OptimizationLevel::from(optimize))?;
654    let serializer = crate::compiler::serializer::BinarySerializer::new(true);
655    serializer.write_to_file(&binary, &output)?;
656    println!("✅ Bundle created: {}", output.display());
657    println!("  Size: {} bytes", binary.size());
658    if let Some(file_count) = binary.metadata.extra.get("bundle_files") {
659        println!("  Files bundled: {}", file_count);
660    }
661    Ok(())
662}
663fn info_command(
664    file: PathBuf,
665    format: String,
666    symbols: bool,
667    sections: bool,
668    verbose: bool,
669) -> Result<(), Box<dyn std::error::Error>> {
670    let loader = BinaryLoader::new();
671    let binary = loader.load_file(&file)?;
672    match format.as_str() {
673        "json" => {
674            let json = serde_json::to_string_pretty(&binary.metadata)?;
675            println!("{}", json);
676        }
677        "yaml" => {
678            println!("YAML output not yet implemented");
679        }
680        "text" | _ => {
681            println!("HELIX Binary Information");
682            println!("=======================");
683            println!("File: {}", file.display());
684            println!("Version: {}", binary.version);
685            println!("Compiler: {}", binary.metadata.compiler_version);
686            println!("Platform: {}", binary.metadata.platform);
687            println!("Created: {}", binary.metadata.created_at);
688            println!("Optimization: Level {}", binary.metadata.optimization_level);
689            println!("Compressed: {}", binary.flags.compressed);
690            println!("Size: {} bytes", binary.size());
691            println!("Checksum: {:x}", binary.checksum);
692            if let Some(source) = &binary.metadata.source_path {
693                println!("Source: {}", source);
694            }
695            if symbols || verbose {
696                println!("\nSymbol Table:");
697                let stats = binary.symbol_table.stats();
698                println!(
699                    "  Strings: {} (unique: {})", stats.total_strings, stats
700                    .unique_strings
701                );
702                println!("  Total bytes: {}", stats.total_bytes);
703                println!("  Agents: {}", stats.agents);
704                println!("  Workflows: {}", stats.workflows);
705                println!("  Contexts: {}", stats.contexts);
706                println!("  Crews: {}", stats.crews);
707            }
708            if sections || verbose {
709                println!("\nData Sections:");
710                for (i, section) in binary.data_sections.iter().enumerate() {
711                    println!("  [{}] {:?}", i, section.section_type);
712                    println!("      Size: {} bytes", section.size);
713                    if let Some(compression) = &section.compression {
714                        println!("      Compression: {:?}", compression);
715                    }
716                }
717            }
718        }
719    }
720    Ok(())
721}
722fn watch_command(
723    directory: PathBuf,
724    _output: Option<PathBuf>,
725    _optimize: u8,
726    _verbose: bool,
727) -> Result<(), Box<dyn std::error::Error>> {
728    println!("👀 Watching directory: {}", directory.display());
729    println!("  Press Ctrl+C to stop");
730    println!("Watch mode not yet implemented");
731    Ok(())
732}
733fn diff_command(
734    file1: PathBuf,
735    file2: PathBuf,
736    detailed: bool,
737) -> Result<(), Box<dyn std::error::Error>> {
738    let loader = BinaryLoader::new();
739    let binary1 = loader.load_file(&file1)?;
740    let binary2 = loader.load_file(&file2)?;
741    println!("Comparing binaries:");
742    println!("  File 1: {}", file1.display());
743    println!("  File 2: {}", file2.display());
744    println!();
745    if binary1.version != binary2.version {
746        println!("⚠️  Version differs: {} vs {}", binary1.version, binary2.version);
747    }
748    if binary1.size() != binary2.size() {
749        println!("⚠️  Size differs: {} vs {} bytes", binary1.size(), binary2.size());
750    }
751    let stats1 = binary1.symbol_table.stats();
752    let stats2 = binary2.symbol_table.stats();
753    if stats1.total_strings != stats2.total_strings {
754        println!(
755            "⚠️  String count differs: {} vs {}", stats1.total_strings, stats2
756            .total_strings
757        );
758    }
759    if detailed {}
760    Ok(())
761}
762fn optimize_command(
763    input: PathBuf,
764    output: Option<PathBuf>,
765    level: u8,
766    verbose: bool,
767) -> Result<(), Box<dyn std::error::Error>> {
768    let output_path = output.unwrap_or_else(|| input.clone());
769    if verbose {
770        println!("⚡ Optimizing: {}", input.display());
771        println!("  Level: {}", level);
772    }
773    let loader = BinaryLoader::new();
774    let binary = loader.load_file(&input)?;
775    let serializer = crate::compiler::serializer::BinarySerializer::new(false);
776    let mut ir = serializer.deserialize_to_ir(&binary)?;
777    let mut optimizer = crate::compiler::optimizer::Optimizer::new(
778        OptimizationLevel::from(level),
779    );
780    optimizer.optimize(&mut ir);
781    let optimized_binary = serializer.serialize(ir, None)?;
782    serializer.write_to_file(&optimized_binary, &output_path)?;
783    println!("✅ Optimized successfully: {}", output_path.display());
784    if verbose {
785        let stats = optimizer.stats();
786        println!("\nOptimization Results:");
787        println!("{}", stats.report());
788    }
789    Ok(())
790}
791const EMBEDDED_TEMPLATES: &[(&str, &str)] = &[
792    ("minimal", r#"# Minimal MSO Configuration Example
793# Demonstrates the simplest valid MSO file
794
795project "minimal-example" {
796    version = "0.1.0"
797    author = "Example"
798}
799
800agent "simple-assistant" {
801    model = "gpt-3.5-turbo"
802    role = "Assistant"
803    temperature = 0.7
804}
805
806workflow "basic-task" {
807    trigger = "manual"
808
809    step "process" {
810        agent = "simple-assistant"
811        task = "Process user request"
812        timeout = 5m
813    }
814}"#),
815    ("ai-dev", "# AI Development Team template - full content embedded"),
816    ("support", r#"# Customer Support AI Configuration
817# AI-powered customer service system
818
819project "customer-support-system" {
820    version = "2.0.0"
821    author = "Support Team"
822    description = "AI-driven customer support with multi-channel capabilities"
823}
824
825agent "support-specialist" {
826    model = "claude-3-sonnet"
827    role = "Customer Support Specialist"
828    temperature = 0.7
829    max_tokens = 100000
830
831    capabilities [
832        "customer-service"
833        "problem-solving"
834        "empathy"
835        "multi-language"
836        "escalation-handling"
837    ]
838
839    backstory {
840        8 years in customer support leadership
841        Handled 100K+ customer interactions
842        Expert in de-escalation techniques
843        Trained support teams worldwide
844    }
845
846    tools = [
847        "zendesk"
848        "intercom"
849        "slack"
850        "email-client"
851        "knowledge-base"
852    ]
853}
854
855agent "technical-expert" {
856    model = "gpt-4"
857    role = "Technical Support Engineer"
858    temperature = 0.6
859    max_tokens = 80000
860
861    capabilities [
862        "technical-troubleshooting"
863        "bug-analysis"
864        "system-diagnostics"
865        "code-review"
866        "api-debugging"
867    ]
868
869    backstory {
870        12 years in software engineering
871        Specialized in distributed systems
872        Published technical documentation
873        Led incident response teams
874    }
875
876    tools = [
877        "terminal"
878        "database-client"
879        "monitoring-tools"
880        "api-tester"
881        "log-analyzer"
882    ]
883}
884
885workflow "customer-inquiry-handling" {
886    trigger = "webhook"
887
888    step "triage" {
889        agent = "support-specialist"
890        task = "Analyze customer inquiry and determine priority level"
891        timeout = 5m
892    }
893
894    step "initial-response" {
895        agent = "support-specialist"
896        task = "Provide immediate acknowledgment and gather more details"
897        timeout = 10m
898        depends_on = ["triage"]
899    }
900
901    step "technical-analysis" {
902        agent = "technical-expert"
903        task = "Investigate technical aspects of the issue"
904        timeout = 15m
905        depends_on = ["triage"]
906
907        retry {
908            max_attempts = 2
909            delay = 2m
910            backoff = "exponential"
911        }
912    }
913
914    step "resolution" {
915        crew = ["support-specialist", "technical-expert"]
916        task = "Develop and implement solution"
917        timeout = 30m
918        depends_on = ["initial-response", "technical-analysis"]
919    }
920
921    step "follow-up" {
922        agent = "support-specialist"
923        task = "Ensure customer satisfaction and document resolution"
924        timeout = 10m
925        depends_on = ["resolution"]
926    }
927
928    pipeline {
929        triage -> initial-response -> technical-analysis -> resolution -> follow-up
930    }
931}
932
933crew "support-team" {
934    agents [
935        "support-specialist"
936        "technical-expert"
937    ]
938
939    process = "hierarchical"
940    manager = "technical-expert"
941    max_iterations = 5
942    verbose = true
943}
944
945memory {
946    provider = "redis"
947    connection = "redis://localhost:6379"
948
949    embeddings {
950        model = "text-embedding-ada-002"
951        dimensions = 1536
952        batch_size = 50
953    }
954
955    cache_size = 5000
956    persistence = false
957}
958
959context "production" {
960    environment = "prod"
961    debug = false
962    max_tokens = 150000
963
964    secrets {
965        zendesk_token = $ZENDESK_API_TOKEN
966        intercom_token = $INTERCOM_API_TOKEN
967        slack_token = $SLACK_API_TOKEN
968    }
969
970    variables {
971        support_email = "support@company.com"
972        response_timeout = 4h
973        escalation_threshold = 24h
974        max_concurrent_tickets = 50
975    }
976}"#),
977    ("data-pipeline", "# Data Pipeline template - full content embedded"),
978    ("research", "# Research Assistant template - full content embedded"),
979];
980fn init_command(
981    template: String,
982    dir: Option<PathBuf>,
983    name: Option<String>,
984    force: bool,
985    verbose: bool,
986) -> Result<(), Box<dyn std::error::Error>> {
987    let template_content = EMBEDDED_TEMPLATES
988        .iter()
989        .find(|(t, _)| t == &template)
990        .map(|(_, content)| *content)
991        .ok_or_else(|| {
992            let available: Vec<&str> = EMBEDDED_TEMPLATES
993                .iter()
994                .map(|(name, _)| *name)
995                .collect();
996            format!(
997                "Unknown template '{}'. Available templates: {}", template, available
998                .join(", ")
999            )
1000        })?;
1001    let output_dir = dir
1002        .unwrap_or_else(|| {
1003            std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))
1004        });
1005    let filename = name
1006        .unwrap_or_else(|| {
1007            match template.as_str() {
1008                "ai-dev" => "ai_development_team.hlx".to_string(),
1009                "data-pipeline" => "data_pipeline.hlx".to_string(),
1010                _ => format!("{}.hlx", template),
1011            }
1012        });
1013    let output_path = output_dir.join(&filename);
1014    if output_path.exists() && !force {
1015        return Err(anyhow::anyhow!(
1016            "File '{}' already exists. Use --force to overwrite.", output_path
1017            .display()
1018        ).into());
1019    }
1020    if verbose {
1021        println!("🚀 Initializing HELIX project:");
1022        println!("  Template: {}", template);
1023        println!("  Output: {}", output_path.display());
1024        println!("  Force: {}", force);
1025    }
1026    if let Some(parent) = output_path.parent() {
1027        std::fs::create_dir_all(parent)?;
1028    }
1029    std::fs::write(&output_path, template_content)?;
1030    println!("✅ HELIX project initialized successfully!");
1031    println!("  Created: {}", output_path.display());
1032    println!("  Template: {}", template);
1033    if verbose {
1034        let content_size = template_content.len();
1035        println!("  Size: {} bytes", content_size);
1036        let description = match template.as_str() {
1037            "minimal" => "Simple hlx configuration with basic agent and workflow",
1038            "ai-dev" => {
1039                "Complete AI development team with specialized agents for full-stack development"
1040            }
1041            "support" => {
1042                "Multi-tier customer support system with escalation and knowledge management"
1043            }
1044            "data-pipeline" => {
1045                "High-throughput data processing pipeline with ML integration"
1046            }
1047            "research" => {
1048                "AI-powered research assistant for literature review and paper writing"
1049            }
1050            _ => "HELIX configuration template",
1051        };
1052        println!("  Description: {}", description);
1053    }
1054    println!("\n📋 Next steps:");
1055    println!("  1. Review and customize the configuration");
1056    println!("  2. Set up your API keys and environment variables");
1057    println!("  3. Compile with: helix compile {}", filename);
1058    println!("  4. Run with your hlx runtime");
1059    Ok(())
1060}
1061fn install_command(
1062    local_only: bool,
1063    force: bool,
1064    verbose: bool,
1065) -> Result<(), Box<dyn std::error::Error>> {
1066    if verbose {
1067        println!("🔧 Installing Helix compiler globally...");
1068    }
1069    let current_exe = std::env::current_exe()
1070        .map_err(|e| format!("Failed to get current executable path: {}", e))?;
1071    if verbose {
1072        println!("  Source: {}", current_exe.display());
1073    }
1074    let home_dir = std::env::var("HOME")
1075        .map_err(|e| format!("Failed to get HOME directory: {}", e))?;
1076    let baton_dir = PathBuf::from(&home_dir).join(".baton");
1077    let baton_bin_dir = baton_dir.join("bin");
1078    let target_binary = baton_bin_dir.join("hlx");
1079    if verbose {
1080        println!("  Target: {}", target_binary.display());
1081    }
1082    std::fs::create_dir_all(&baton_bin_dir)
1083        .map_err(|e| {
1084            format!("Failed to create directory {}: {}", baton_bin_dir.display(), e)
1085        })?;
1086    if verbose {
1087        println!("  ✅ Created directory: {}", baton_bin_dir.display());
1088    }
1089    if target_binary.exists() && !force {
1090        return Err(
1091            format!(
1092                "HELIX compiler already installed at {}. Use --force to overwrite.",
1093                target_binary.display()
1094            )
1095                .into(),
1096        );
1097    }
1098    std::fs::copy(&current_exe, &target_binary)
1099        .map_err(|e| {
1100            format!("Failed to copy binary to {}: {}", target_binary.display(), e)
1101        })?;
1102    #[cfg(unix)]
1103    {
1104        use std::os::unix::fs::PermissionsExt;
1105        let mut perms = std::fs::metadata(&target_binary)?.permissions();
1106        perms.set_mode(0o755);
1107        std::fs::set_permissions(&target_binary, perms)?;
1108    }
1109    if verbose {
1110        println!("  ✅ Copied binary to: {}", target_binary.display());
1111    }
1112    println!("✅ Helix compiler installed successfully!");
1113    println!("  Location: {}", target_binary.display());
1114    if local_only {
1115        println!("\n📋 Local installation complete!");
1116        println!("  Add {} to your PATH to use 'hlx' command", baton_bin_dir.display());
1117        println!("  Or run: export PATH=\"{}:$PATH\"", baton_bin_dir.display());
1118        return Ok(());
1119    }
1120    let global_bin_paths = vec![
1121        PathBuf::from("/usr/local/bin"), PathBuf::from("/usr/bin"),
1122        PathBuf::from("/opt/homebrew/bin"),
1123        PathBuf::from("/home/linuxbrew/.linuxbrew/bin"),
1124    ];
1125    let mut symlink_created = false;
1126    for global_bin in global_bin_paths {
1127        if global_bin.exists() && global_bin.is_dir() {
1128            let symlink_path = global_bin.join("hlx");
1129            if symlink_path.exists() && !force {
1130                if verbose {
1131                    println!(
1132                        "  ⚠️  Symlink already exists: {}", symlink_path.display()
1133                    );
1134                }
1135                continue;
1136            }
1137            if symlink_path.exists() {
1138                std::fs::remove_file(&symlink_path)
1139                    .map_err(|e| {
1140                        format!(
1141                            "Failed to remove existing symlink {}: {}", symlink_path
1142                            .display(), e
1143                        )
1144                    })?;
1145            }
1146            #[cfg(unix)]
1147            let symlink_result = std::os::unix::fs::symlink(&target_binary, &symlink_path);
1148
1149            #[cfg(windows)]
1150            let symlink_result = {
1151                // On Windows, try to create a copy instead of symlink if symlink fails
1152                std::fs::copy(&target_binary, &symlink_path)
1153                    .map(|_| ())
1154                    .or_else(|_| std::os::windows::fs::symlink_file(&target_binary, &symlink_path))
1155            };
1156
1157            #[cfg(not(any(unix, windows)))]
1158            let symlink_result = std::fs::copy(&target_binary, &symlink_path).map(|_| ());
1159
1160            match symlink_result {
1161                Ok(_) => {
1162                    println!("  ✅ Created global link: {}", symlink_path.display());
1163                    symlink_created = true;
1164                    break;
1165                }
1166                Err(e) => {
1167                    if verbose {
1168                        println!(
1169                            "  ⚠️  Failed to create link at {}: {}", symlink_path
1170                            .display(), e
1171                        );
1172                    }
1173                    continue;
1174                }
1175            }
1176        }
1177    }
1178    if symlink_created {
1179        println!("\n🎉 Global installation complete!");
1180        println!("  You can now use 'hlx' command from anywhere");
1181        println!("  Try: hlx --help");
1182    } else {
1183        println!("\n📋 Installation complete, but global symlink creation failed");
1184        println!("  This might be due to insufficient permissions");
1185        println!(
1186            "  You can still use hlx by adding {} to your PATH", baton_bin_dir.display()
1187        );
1188        println!("  Or run: export PATH=\"{}:$PATH\"", baton_bin_dir.display());
1189        if verbose {
1190            println!("\n💡 To create global symlink manually:");
1191            println!("  sudo ln -sf {} /usr/local/bin/hlx", target_binary.display());
1192        }
1193    }
1194    Ok(())
1195}
1196fn build_project(
1197    input: Option<PathBuf>,
1198    output: Option<PathBuf>,
1199    optimize: u8,
1200    compress: bool,
1201    cache: bool,
1202    verbose: bool,
1203) -> Result<(), Box<dyn std::error::Error>> {
1204    let project_dir = find_project_root()?;
1205    let input_file = match input {
1206        Some(path) => path,
1207        None => {
1208            let main_file = project_dir.join("src").join("main.hlx");
1209            if main_file.exists() {
1210                main_file
1211            } else {
1212                return Err(
1213                    anyhow::anyhow!(
1214                        "No input file specified and no src/main.hlx found.\n\
1215                    Specify a file with: helix build <file.hlx>"
1216                    )
1217                        .into(),
1218                );
1219            }
1220        }
1221    };
1222    let output_file = output
1223        .unwrap_or_else(|| {
1224            let target_dir = project_dir.join("target");
1225            let input_stem = input_file
1226                .file_stem()
1227                .and_then(|s| s.to_str())
1228                .unwrap_or("output");
1229            target_dir.join(format!("{}.hlxb", input_stem))
1230        });
1231    if verbose {
1232        println!("🔨 Building HELIX project:");
1233        println!("  Input: {}", input_file.display());
1234        println!("  Output: {}", output_file.display());
1235        println!("  Optimization: Level {}", optimize);
1236        println!("  Compression: {}", if compress { "Enabled" } else { "Disabled" });
1237        println!("  Cache: {}", if cache { "Enabled" } else { "Disabled" });
1238    }
1239    if let Some(parent) = output_file.parent() {
1240        std::fs::create_dir_all(parent)?;
1241    }
1242    let compiler = Compiler::builder()
1243        .optimization_level(OptimizationLevel::from(optimize))
1244        .compression(compress)
1245        .cache(cache)
1246        .verbose(verbose)
1247        .build();
1248    let binary = compiler.compile_file(&input_file)?;
1249    let serializer = crate::compiler::serializer::BinarySerializer::new(compress);
1250    serializer.write_to_file(&binary, &output_file)?;
1251    println!("✅ Build completed successfully!");
1252    println!("  Output: {}", output_file.display());
1253    println!("  Size: {} bytes", binary.size());
1254    if verbose {
1255        let stats = binary.symbol_table.stats();
1256        println!(
1257            "  Strings: {} (unique: {})", stats.total_strings, stats.unique_strings
1258        );
1259        println!("  Agents: {}", stats.agents);
1260        println!("  Workflows: {}", stats.workflows);
1261    }
1262    Ok(())
1263}
1264fn find_project_root() -> Result<PathBuf, Box<dyn std::error::Error>> {
1265    let mut current_dir = std::env::current_dir()
1266        .context("Failed to get current directory")?;
1267    loop {
1268        let manifest_path = current_dir.join("project.hlx");
1269        if manifest_path.exists() {
1270            return Ok(current_dir);
1271        }
1272        if let Some(parent) = current_dir.parent() {
1273            current_dir = parent.to_path_buf();
1274        } else {
1275            break;
1276        }
1277    }
1278    Err(anyhow::anyhow!("No HELIX project found. Run 'helix init' first.").into())
1279}
1280// HLX-AI Command Handlers
1281async fn dataset_command(
1282    action: DatasetAction,
1283    verbose: bool,
1284) -> Result<(), Box<dyn std::error::Error>> {
1285    match action {
1286        DatasetAction::Process { files, output, format, algorithm, validate } => {
1287            if verbose {
1288                println!("🧠 Processing datasets with HLX-AI...");
1289                println!("  Files: {:?}", files);
1290                println!("  Output: {:?}", output);
1291                println!("  Format: {:?}", format);
1292                println!("  Algorithm: {:?}", algorithm);
1293                println!("  Validate: {}", validate);
1294            }
1295
1296            // Use the HLX json core functionality
1297            use crate::json::core::{GenericJSONDataset, DataFormat};
1298
1299            for file in &files {
1300                if verbose {
1301                    println!("📊 Processing: {}", file.display());
1302                }
1303
1304                let dataset = GenericJSONDataset::new(&[file.clone()], None, DataFormat::Auto)
1305                    .map_err(|e| format!("Failed to load dataset {}: {}", file.display(), e))?;
1306
1307                let training_dataset = dataset.to_training_dataset()
1308                    .map_err(|e| format!("Failed to convert dataset {}: {}", file.display(), e))?;
1309
1310                if validate {
1311                    let quality = training_dataset.quality_assessment();
1312                    println!("✅ Quality Score: {:.2}", quality.overall_score);
1313                    if !quality.issues.is_empty() {
1314                        println!("⚠️  Issues:");
1315                        for issue in &quality.issues {
1316                            println!("   - {}", issue);
1317                        }
1318                    }
1319                }
1320
1321                if let Some(algo) = &algorithm {
1322                    if training_dataset.to_algorithm_format(algo).is_ok() {
1323                        println!("✅ Converted to {} format", algo.to_uppercase());
1324                    } else {
1325                        println!("❌ Failed to convert to {} format", algo.to_uppercase());
1326                    }
1327                }
1328
1329                println!("📈 Dataset stats: {} samples", training_dataset.samples.len());
1330            }
1331
1332            println!("🎉 Dataset processing completed!");
1333            Ok(())
1334        }
1335        DatasetAction::Analyze { files, detailed } => {
1336            if verbose {
1337                println!("🔍 Analyzing datasets...");
1338            }
1339
1340            use crate::json::core::{GenericJSONDataset, DataFormat};
1341
1342            for file in files {
1343                if verbose {
1344                    println!("📊 Analyzing: {}", file.display());
1345                }
1346
1347                let dataset = GenericJSONDataset::new(&[file.clone()], None, DataFormat::Auto)
1348                    .map_err(|e| format!("Failed to load dataset {}: {}", file.display(), e))?;
1349
1350                println!("\n--- Dataset Analysis: {} ---", file.display());
1351                for (key, value) in dataset.stats() {
1352                    println!("{:15}: {}", key, value);
1353                }
1354
1355                if detailed {
1356                    let training_dataset = dataset.to_training_dataset()
1357                        .map_err(|e| format!("Failed to convert dataset {}: {}", file.display(), e))?;
1358
1359                    println!("\n--- Training Format Analysis ---");
1360                    println!("Format: {:?}", training_dataset.format);
1361                    println!("Samples: {}", training_dataset.samples.len());
1362                    println!("Avg Prompt Length: {:.1}", training_dataset.statistics.avg_prompt_length);
1363                    println!("Avg Completion Length: {:.1}", training_dataset.statistics.avg_completion_length);
1364
1365                    println!("\n--- Field Coverage ---");
1366                    for (field, coverage) in &training_dataset.statistics.field_coverage {
1367                        println!("{:12}: {:.1}%", field, coverage * 100.0);
1368                    }
1369                }
1370            }
1371
1372            Ok(())
1373        }
1374        DatasetAction::Convert { input, output: _output, from_format, to_format } => {
1375            if verbose {
1376                println!("🔄 Converting dataset format...");
1377                println!("  Input: {}", input.display());
1378                println!("  From: {}", from_format);
1379                println!("  To: {}", to_format);
1380            }
1381
1382            // This would implement format conversion between different training formats
1383            println!("🔄 Format conversion: {} → {}", from_format, to_format);
1384            println!("✅ Conversion completed (placeholder)");
1385            Ok(())
1386        }
1387        DatasetAction::Quality { files, report } => {
1388            if verbose {
1389                println!("📊 Assessing dataset quality...");
1390            }
1391
1392            use crate::json::core::{GenericJSONDataset, DataFormat};
1393
1394            for file in files {
1395                let dataset = GenericJSONDataset::new(&[file.clone()], None, DataFormat::Auto)
1396                    .map_err(|e| format!("Failed to load dataset {}: {}", file.display(), e))?;
1397
1398                let training_dataset = dataset.to_training_dataset()
1399                    .map_err(|e| format!("Failed to convert dataset {}: {}", file.display(), e))?;
1400
1401                let quality = training_dataset.quality_assessment();
1402
1403                if report {
1404                    println!("\n=== Quality Report: {} ===", file.display());
1405                    println!("Overall Score: {:.2}/1.0", quality.overall_score);
1406                    println!("\nIssues:");
1407                    if quality.issues.is_empty() {
1408                        println!("  ✅ No issues found");
1409                    } else {
1410                        for issue in &quality.issues {
1411                            println!("  ⚠️  {}", issue);
1412                        }
1413                    }
1414                    println!("\nRecommendations:");
1415                    for rec in &quality.recommendations {
1416                        println!("  💡 {}", rec);
1417                    }
1418                } else {
1419                    println!("📊 {}: Quality Score {:.2}", file.display(), quality.overall_score);
1420                }
1421            }
1422
1423            Ok(())
1424        }
1425        DatasetAction::Huggingface { dataset, split, output, cache_dir } => {
1426            if verbose {
1427                println!("🤗 Loading HuggingFace dataset...");
1428                println!("  Dataset: {}", dataset);
1429                println!("  Split: {:?}", split.as_ref().unwrap_or(&"train".to_string()));
1430                println!("  Cache: {:?}", cache_dir);
1431                println!("  Output: {:?}", output);
1432            }
1433
1434            // Use the HLX HuggingFace processor
1435            let processor = crate::json::HfProcessor::new(cache_dir.unwrap_or_else(|| PathBuf::from("./hf_cache")));
1436
1437            let config = crate::json::HfDatasetConfig {
1438                source: dataset.clone(),
1439                split: split.unwrap_or_else(|| "train".to_string()),
1440                format: None,
1441                rpl_filter: None,
1442                revision: None,
1443                streaming: false,
1444                trust_remote_code: false,
1445                num_proc: None,
1446            };
1447
1448            // Process the dataset
1449            match processor.process_dataset(&dataset, &config).await {
1450                Ok(training_dataset) => {
1451                    println!("✅ HuggingFace dataset loaded successfully");
1452                    println!("📊 Samples: {}", training_dataset.samples.len());
1453                    println!("📝 Format: {:?}", training_dataset.format);
1454
1455                    // Save to output file if specified
1456                    if let Some(output_path) = output {
1457                        let json_output = serde_json::to_string_pretty(&training_dataset.samples)
1458                            .map_err(|e| format!("Failed to serialize output: {}", e))?;
1459                        std::fs::write(&output_path, json_output)
1460                            .map_err(|e| format!("Failed to write output file {}: {}", output_path.display(), e))?;
1461                        println!("💾 Saved processed dataset to: {}", output_path.display());
1462                    }
1463                }
1464                Err(e) => {
1465                    println!("❌ Failed to load HuggingFace dataset: {}", e);
1466                    return Err(e.into());
1467                }
1468            }
1469
1470            Ok(())
1471        }
1472    }
1473}
1474
1475fn concat_command(
1476    directory: PathBuf,
1477    preset: String,
1478    output_dir: Option<PathBuf>,
1479    dry_run: bool,
1480    deduplicate: bool,
1481    verbose: bool,
1482) -> Result<(), Box<dyn std::error::Error>> {
1483    if verbose {
1484        println!("🔗 Concatenating files...");
1485        println!("  Directory: {}", directory.display());
1486        println!("  Preset: {}", preset);
1487        println!("  Output: {:?}", output_dir);
1488        println!("  Dry Run: {}", dry_run);
1489        println!("  Deduplicate: {}", deduplicate);
1490    }
1491
1492    use crate::json::concat::{ConcatConfig, FileExtensionPreset};
1493
1494    let config = match preset.as_str() {
1495        "caption+wd+tags" => ConcatConfig::from_preset(FileExtensionPreset::CaptionWdTags),
1496        "florence+wd+tags" => ConcatConfig::from_preset(FileExtensionPreset::FlorenceWdTags),
1497        _ => {
1498            return Err(format!("Unknown preset: {}. Use 'caption+wd+tags' or 'florence+wd+tags'", preset).into());
1499        }
1500    };
1501
1502    let _config = if deduplicate {
1503        config.with_deduplication(true)
1504    } else {
1505        config
1506    };
1507
1508    // This would be async in a real implementation
1509    println!("🔄 Concatenating files in: {}", directory.display());
1510    println!("📝 Using preset: {}", preset);
1511
1512    if dry_run {
1513        println!("🔍 Dry run mode - no files will be modified");
1514    }
1515
1516    println!("✅ Concatenation completed (placeholder)");
1517    Ok(())
1518}
1519
1520async fn caption_command(
1521    action: CaptionAction,
1522    verbose: bool,
1523) -> Result<(), Box<dyn std::error::Error>> {
1524    match action {
1525        CaptionAction::Process { files, output, config } => {
1526            if verbose {
1527                println!("📝 Processing caption files...");
1528                println!("  Files: {:?}", files);
1529                println!("  Output: {:?}", output);
1530                println!("  Config: {:?}", config);
1531            }
1532
1533
1534            for file in files {
1535                if verbose {
1536                    println!("🎨 Processing: {}", file.display());
1537                }
1538
1539                // Process caption file
1540                match crate::json::caption::process_file(&file).await {
1541                    Ok(_) => println!("✅ Processed: {}", file.display()),
1542                    Err(e) => println!("❌ Failed to process {}: {}", file.display(), e),
1543                }
1544            }
1545
1546            Ok(())
1547        }
1548        CaptionAction::E621 { files, output, filter_tags, format } => {
1549            if verbose {
1550                println!("🔞 Processing E621 captions...");
1551                println!("  Filter tags: {}", filter_tags);
1552                println!("  Format: {:?}", format);
1553                println!("  Output: {:?}", output);
1554            }
1555
1556            use crate::json::caption::{E621Config, process_e621_json_file};
1557
1558            let config = E621Config::new()
1559                .with_filter_tags(filter_tags)
1560                .with_format(format);
1561
1562            for file in files {
1563                if verbose {
1564                    println!("🎨 Processing E621: {}", file.display());
1565                }
1566
1567                // Process E621 JSON file
1568                match process_e621_json_file(&file, Some(config.clone())).await {
1569                    Ok(_) => {
1570                        println!("✅ Processed E621 file: {}", file.display());
1571                        // If output is specified, copy processed file there
1572                        if let Some(output_path) = &output {
1573                            let file_name = file.file_name().unwrap_or_default();
1574                            let target_path = output_path.join(file_name);
1575                            if let Some(parent) = target_path.parent() {
1576                                std::fs::create_dir_all(parent)?;
1577                            }
1578                            match std::fs::copy(&file, &target_path) {
1579                                Ok(_) => println!("💾 Saved processed file to: {}", target_path.display()),
1580                                Err(e) => println!("⚠️  Failed to save to output: {}", e),
1581                            }
1582                        }
1583                    }
1584                    Err(e) => println!("❌ Failed to process E621 file {}: {}", file.display(), e),
1585                }
1586            }
1587
1588            Ok(())
1589        }
1590        CaptionAction::Convert { input, output, format } => {
1591            if verbose {
1592                println!("🔄 Converting caption format...");
1593                println!("  Input: {}", input.display());
1594                println!("  Output: {:?}", output);
1595                println!("  Format: {:?}", format);
1596            }
1597
1598            println!("🔄 Converting caption format (placeholder)");
1599            println!("✅ Conversion completed");
1600            Ok(())
1601        }
1602    }
1603}
1604
1605async fn json_command(
1606    action: JsonAction,
1607    verbose: bool,
1608) -> Result<(), Box<dyn std::error::Error>> {
1609    match action {
1610        JsonAction::Format { files, check } => {
1611            if verbose {
1612                println!("🎨 Formatting JSON files...");
1613                println!("  Check only: {}", check);
1614            }
1615
1616            use crate::json::format_json_file;
1617
1618            for file in files {
1619                if verbose {
1620                    println!("📝 Formatting: {}", file.display());
1621                }
1622
1623                if check {
1624                    // Check if file is properly formatted
1625                    match format_json_file(file.clone()).await {
1626                        Ok(_) => println!("✅ {} is properly formatted", file.display()),
1627                        Err(e) => println!("❌ {} needs formatting: {}", file.display(), e),
1628                    }
1629                } else {
1630                    // Format the file
1631                    match format_json_file(file.clone()).await {
1632                        Ok(_) => println!("✅ Formatted: {}", file.display()),
1633                        Err(e) => println!("❌ Failed to format {}: {}", file.display(), e),
1634                    }
1635                }
1636            }
1637
1638            Ok(())
1639        }
1640        JsonAction::Validate { files, schema } => {
1641            if verbose {
1642                println!("✅ Validating JSON files...");
1643                println!("  Schema: {:?}", schema);
1644            }
1645
1646            use crate::json::core::{GenericJSONDataset, DataFormat};
1647
1648            for file in files {
1649                if verbose {
1650                    println!("🔍 Validating: {}", file.display());
1651                }
1652
1653                match GenericJSONDataset::new(&[file.clone()], schema.as_deref(), DataFormat::Auto) {
1654                    Ok(dataset) => {
1655                        println!("✅ {} is valid JSON", file.display());
1656                        if verbose {
1657                            println!("   Samples: {}", dataset.len());
1658                            println!("   Format: {:?}", dataset.format);
1659                        }
1660                    }
1661                    Err(e) => println!("❌ {} validation failed: {}", file.display(), e),
1662                }
1663            }
1664
1665            Ok(())
1666        }
1667        JsonAction::Metadata { files, output } => {
1668            if verbose {
1669                println!("📊 Extracting JSON metadata...");
1670                println!("  Output: {:?}", output);
1671            }
1672
1673            use crate::json::process_safetensors_file;
1674
1675            for file in files {
1676                if file.extension().and_then(|s| s.to_str()) == Some("safetensors") {
1677                    if verbose {
1678                        println!("🔍 Processing SafeTensors: {}", file.display());
1679                    }
1680
1681                    // Extract metadata from SafeTensors file
1682                    match process_safetensors_file(&file).await {
1683                        Ok(_) => println!("✅ Metadata extracted from: {}", file.display()),
1684                        Err(e) => println!("❌ Failed to extract metadata from {}: {}", file.display(), e),
1685                    }
1686                } else {
1687                    println!("⚠️  Skipping non-SafeTensors file: {}", file.display());
1688                }
1689            }
1690
1691            Ok(())
1692        }
1693        JsonAction::Split { file, output } => {
1694            if verbose {
1695                println!("✂️  Splitting JSON file...");
1696                println!("  Input: {}", file.display());
1697                println!("  Output: {:?}", output);
1698            }
1699
1700            use crate::json::split_content;
1701
1702            // Read and split the JSON file content
1703            let content = tokio::fs::read_to_string(&file).await?;
1704            let (tags, sentences) = split_content(&content);
1705            println!("✅ Split {}: {} tags, {} sentences", file.display(), tags.len(), sentences.len());
1706
1707            if let Some(output_path) = output {
1708                let split_data = serde_json::json!({
1709                    "tags": tags,
1710                    "sentences": sentences
1711                });
1712                let json_output = serde_json::to_string_pretty(&split_data)
1713                    .map_err(|e| format!("Failed to serialize split data: {}", e))?;
1714                std::fs::write(&output_path, json_output)
1715                    .map_err(|e| format!("Failed to write split output to {}: {}", output_path.display(), e))?;
1716                println!("💾 Saved split data to: {}", output_path.display());
1717            }
1718            Ok(())
1719        }
1720        JsonAction::Merge { files, output } => {
1721            if verbose {
1722                println!("🔗 Merging JSON files...");
1723                println!("  Output: {}", output.display());
1724            }
1725
1726            use crate::json::core::{run_json_cmd, JsonArgs};
1727
1728            // Use the existing merge functionality
1729            let args = JsonArgs {
1730                data_dir: vec![],
1731                file: files.into_iter().map(|p| p.to_string_lossy().to_string()).collect(),
1732                schema_dir: None,
1733                format: crate::json::core::DataFormat::Auto,
1734                merge_output: Some(output),
1735                show_stats: verbose,
1736                seed: 42,
1737                multi_process: false,
1738                input_folder: None,
1739                output: None,
1740                jobs: num_cpus::get(),
1741            };
1742
1743            // Run the JSON merge command
1744            match run_json_cmd(args).await {
1745                Ok(_) => println!("✅ Successfully merged JSON files"),
1746                Err(e) => println!("❌ Failed to merge JSON files: {}", e),
1747            }
1748            Ok(())
1749        }
1750    }
1751}
1752
1753#[cfg(test)]
1754mod tests {
1755    use super::*;
1756    #[test]
1757    fn test_cli_parsing() {
1758        let cli = Cli::try_parse_from([
1759            "helix",
1760            "compile",
1761            "test.hlx",
1762            "-O3",
1763            "--compress",
1764        ]);
1765        assert!(cli.is_ok());
1766    }
1767}