context_creator/
cli.rs

1//! Command-line interface configuration and parsing
2
3use clap::{Parser, ValueEnum};
4use std::path::PathBuf;
5use tracing::debug;
6
7/// Help message explaining custom priority rules and usage
8const AFTER_HELP_MSG: &str = "\
9CUSTOM PRIORITY RULES:
10  Custom priority rules are processed in a 'first-match-wins' basis. Rules are 
11  evaluated in the order they are defined in your .context-creator.toml configuration 
12  file. The first rule that matches a given file will be used, and all subsequent 
13  rules will be ignored for that file.
14
15  Example configuration:
16    [[priorities]]
17    pattern = \"src/**/*.rs\"
18    weight = 10.0
19    
20    [[priorities]]  
21    pattern = \"tests/*\"
22    weight = -2.0
23
24USAGE EXAMPLES:
25  # Process current directory with a prompt
26  context-creator --prompt \"Analyze this code\"
27  
28  # Process specific directories (positional arguments)
29  context-creator src/ tests/ docs/
30  
31  # Process specific directories (explicit include flags)
32  context-creator --include src/ --include tests/ --include docs/
33  
34  # Process files matching glob patterns (QUOTE patterns to prevent shell expansion)
35  context-creator --include \"**/*.py\" --include \"src/**/*.{rs,toml}\"
36  
37  # Process specific file types across all directories
38  context-creator --include \"**/*repository*.py\" --include \"**/test[0-9].py\"
39  
40  # Combine prompt with include patterns for targeted analysis
41  context-creator --prompt \"Review security\" --include \"src/auth/**\" --include \"src/security/**\"
42  
43  # Use ignore patterns to exclude unwanted files
44  context-creator --include \"**/*.rs\" --ignore \"target/**\" --ignore \"**/*_test.rs\"
45  
46  # Combine prompt with ignore patterns
47  context-creator --prompt \"Analyze core logic\" --ignore \"tests/**\" --ignore \"docs/**\"
48  
49  # Process a GitHub repository
50  context-creator --remote https://github.com/owner/repo
51  
52  # Read prompt from stdin
53  echo \"Review this code\" | context-creator --stdin .
54  
55  # FLEXIBLE COMBINATIONS (NEW):
56  # Combine prompt with specific directories
57  context-creator --prompt \"Security audit\" src/auth/ src/security/
58  
59  # Combine prompt with GitHub repository
60  context-creator --prompt \"Find bugs\" --remote https://github.com/owner/repo
61  
62  # Combine stdin with specific directories
63  echo \"Analyze patterns\" | context-creator --stdin src/ tests/
64  
65  # Combine include patterns with GitHub repository
66  context-creator --include \"**/*.rs\" --remote https://github.com/owner/repo
67  
68  # Combine stdin with include patterns
69  echo \"Review code\" | context-creator --stdin --include \"**/*.py\"
70";
71
72/// Supported LLM CLI tools
73#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum, Default)]
74pub enum LlmTool {
75    /// Use gemini (default)
76    #[value(name = "gemini")]
77    #[default]
78    Gemini,
79    /// Use codex CLI
80    #[value(name = "codex")]
81    Codex,
82}
83
84/// Log output format options
85#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum, Default)]
86pub enum LogFormat {
87    /// Human-readable plain text format (default)
88    #[value(name = "plain")]
89    #[default]
90    Plain,
91    /// Machine-readable JSON format
92    #[value(name = "json")]
93    Json,
94}
95
96impl LlmTool {
97    /// Get the command name for the tool
98    pub fn command(&self) -> &'static str {
99        match self {
100            LlmTool::Gemini => "gemini",
101            LlmTool::Codex => "codex",
102        }
103    }
104
105    /// Get the installation instructions for the tool
106    pub fn install_instructions(&self) -> &'static str {
107        match self {
108            LlmTool::Gemini => "Please install gemini with: pip install gemini",
109            LlmTool::Codex => {
110                "Please install codex CLI from: https://github.com/microsoft/codex-cli"
111            }
112        }
113    }
114
115    /// Get the default maximum tokens for the tool
116    pub fn default_max_tokens(&self) -> usize {
117        match self {
118            LlmTool::Gemini => 1_000_000,
119            LlmTool::Codex => 1_000_000,
120        }
121    }
122
123    /// Get the default maximum tokens for the tool with optional config override
124    pub fn default_max_tokens_with_config(
125        &self,
126        config_token_limits: Option<&crate::config::TokenLimits>,
127    ) -> usize {
128        if let Some(token_limits) = config_token_limits {
129            match self {
130                LlmTool::Gemini => token_limits.gemini.unwrap_or(1_000_000),
131                LlmTool::Codex => token_limits.codex.unwrap_or(1_000_000),
132            }
133        } else {
134            self.default_max_tokens()
135        }
136    }
137}
138
139/// High-performance CLI tool to convert codebases to Markdown for LLM context
140#[derive(Parser, Debug, Clone)]
141#[command(author, version, about, long_about = None, after_help = AFTER_HELP_MSG)]
142pub struct Config {
143    /// The prompt to send to the LLM for processing
144    #[arg(short = 'p', long = "prompt", help = "Process a text prompt directly")]
145    pub prompt: Option<String>,
146
147    /// One or more directory paths to process
148    /// IMPORTANT: Use `get_directories()` to access the correct input paths.
149    #[arg(value_name = "PATHS", help = "Process files and directories")]
150    pub paths: Option<Vec<PathBuf>>,
151
152    /// Include files and directories matching glob patterns
153    /// IMPORTANT: Use `get_directories()` to access the correct input paths.
154    #[arg(
155        long,
156        help = "Include files and directories matching the given glob pattern.\nPatterns use gitignore-style syntax. To prevent shell expansion,\nquote patterns: --include \"*.py\" --include \"src/**/*.{rs,toml}\""
157    )]
158    pub include: Option<Vec<String>>,
159
160    /// Ignore files and directories matching glob patterns
161    #[arg(
162        long,
163        help = "Ignore files and directories matching the given glob pattern.\nPatterns use gitignore-style syntax. To prevent shell expansion,\nquote patterns: --ignore \"node_modules/**\" --ignore \"target/**\""
164    )]
165    pub ignore: Option<Vec<String>>,
166
167    /// GitHub repository URL to analyze (e.g., <https://github.com/owner/repo>)
168    #[arg(long, help = "Process a GitHub repository")]
169    pub remote: Option<String>,
170
171    /// Read prompt from stdin
172    #[arg(long = "stdin", help = "Read prompt from standard input")]
173    pub read_stdin: bool,
174
175    /// The path to the output Markdown file. If used, won't call the LLM CLI
176    #[arg(short = 'o', long)]
177    pub output_file: Option<PathBuf>,
178
179    /// Maximum number of tokens for the generated codebase context
180    #[arg(long)]
181    pub max_tokens: Option<usize>,
182
183    /// LLM CLI tool to use for processing
184    #[arg(short = 't', long = "tool", default_value = "gemini")]
185    pub llm_tool: LlmTool,
186
187    /// Suppress all output except for errors and the final LLM response
188    #[arg(short = 'q', long)]
189    pub quiet: bool,
190
191    /// Enable verbose logging (use -vv for trace level)
192    #[arg(short = 'v', long, action = clap::ArgAction::Count)]
193    pub verbose: u8,
194
195    /// Log output format
196    #[arg(long = "log-format", value_enum, default_value = "plain")]
197    pub log_format: LogFormat,
198
199    /// Path to configuration file
200    #[arg(short = 'c', long)]
201    pub config: Option<PathBuf>,
202
203    /// Show progress indicators during processing
204    #[arg(long)]
205    pub progress: bool,
206
207    /// Copy output to system clipboard instead of stdout
208    #[arg(short = 'C', long)]
209    pub copy: bool,
210
211    /// Enable enhanced context with file metadata
212    #[arg(long = "enhanced-context")]
213    pub enhanced_context: bool,
214
215    /// Enable import tracing for included files
216    #[arg(long, help = "Include files that import the specified modules")]
217    pub trace_imports: bool,
218
219    /// Include files that call functions from specified modules
220    #[arg(long, help = "Include files containing callers of specified functions")]
221    pub include_callers: bool,
222
223    /// Include type definitions used by specified files
224    #[arg(long, help = "Include type definitions and interfaces")]
225    pub include_types: bool,
226
227    /// Maximum depth for semantic dependency traversal
228    #[arg(
229        long,
230        default_value = "5",
231        help = "Depth limit for dependency traversal"
232    )]
233    pub semantic_depth: usize,
234
235    /// Custom priority rules loaded from config file (not a CLI argument)
236    #[clap(skip)]
237    pub custom_priorities: Vec<crate::config::Priority>,
238
239    /// Token limits loaded from config file (not a CLI argument)
240    #[clap(skip)]
241    pub config_token_limits: Option<crate::config::TokenLimits>,
242
243    /// Maximum tokens from config defaults (not a CLI argument)
244    #[clap(skip)]
245    pub config_defaults_max_tokens: Option<usize>,
246}
247
248impl Default for Config {
249    fn default() -> Self {
250        Self {
251            prompt: None,
252            paths: None,
253            include: None,
254            ignore: None,
255            remote: None,
256            read_stdin: false,
257            output_file: None,
258            max_tokens: None,
259            llm_tool: LlmTool::default(),
260            quiet: false,
261            verbose: 0,
262            log_format: LogFormat::default(),
263            config: None,
264            progress: false,
265            copy: false,
266            enhanced_context: false,
267            trace_imports: false,
268            include_callers: false,
269            include_types: false,
270            semantic_depth: 5,
271            custom_priorities: vec![],
272            config_token_limits: None,
273            config_defaults_max_tokens: None,
274        }
275    }
276}
277
278impl Config {
279    /// Validate the configuration
280    pub fn validate(&self) -> Result<(), crate::utils::error::ContextCreatorError> {
281        use crate::utils::error::ContextCreatorError;
282
283        // Validate that at least one input source is provided
284        let has_input_source = self.get_prompt().is_some()
285            || self.paths.is_some()
286            || self.include.is_some()
287            || self.remote.is_some()
288            || self.read_stdin;
289
290        if !has_input_source {
291            return Err(ContextCreatorError::InvalidConfiguration(
292                "At least one input source must be provided: --prompt, paths, --include, --remote, or --stdin".to_string(),
293            ));
294        }
295
296        // Validate verbose and quiet mutual exclusion
297        if self.verbose > 0 && self.quiet {
298            return Err(ContextCreatorError::InvalidConfiguration(
299                "Cannot use both --verbose (-v) and --quiet (-q) flags together".to_string(),
300            ));
301        }
302
303        // Note: Removed overly restrictive validation rules per issue #34
304        // Now allowing flexible combinations like:
305        // - --prompt with paths (--prompt "text" src/)
306        // - --prompt with --remote (--prompt "text" --remote url)
307        // - --stdin with paths (echo "prompt" | context-creator --stdin src/)
308        // - --include with --remote (--include "**/*.rs" --remote url)
309        // - --include with --stdin (--stdin --include "**/*.rs")
310        //
311        // The only remaining restrictions are for legitimate conflicts:
312        // - --prompt with --output-file (can't send to LLM and write to file)
313        // - --copy with --output-file (can't copy to clipboard and write to file)
314
315        // Validate repo URL if provided
316        if let Some(repo_url) = &self.remote {
317            if !repo_url.starts_with("https://github.com/")
318                && !repo_url.starts_with("http://github.com/")
319            {
320                return Err(ContextCreatorError::InvalidConfiguration(
321                    "Repository URL must be a GitHub URL (https://github.com/owner/repo)"
322                        .to_string(),
323                ));
324            }
325        } else {
326            // Only validate paths if repo is not provided
327            let paths = self.get_directories();
328            for path in &paths {
329                if !path.exists() {
330                    return Err(ContextCreatorError::InvalidPath(format!(
331                        "Path does not exist: {}",
332                        path.display()
333                    )));
334                }
335
336                // Allow both files and directories
337                if !path.is_dir() && !path.is_file() {
338                    return Err(ContextCreatorError::InvalidPath(format!(
339                        "Path is neither a file nor a directory: {}",
340                        path.display()
341                    )));
342                }
343            }
344        }
345
346        // Note: Pattern validation is handled by OverrideBuilder in walker.rs
347        // which provides better security and ReDoS protection
348
349        // Validate output file parent directory exists if specified
350        if let Some(output) = &self.output_file {
351            if let Some(parent) = output.parent() {
352                // Handle empty parent (current directory) and check if parent exists
353                if !parent.as_os_str().is_empty() && !parent.exists() {
354                    return Err(ContextCreatorError::InvalidPath(format!(
355                        "Output directory does not exist: {}",
356                        parent.display()
357                    )));
358                }
359            }
360        }
361
362        // Validate mutually exclusive options
363        if self.output_file.is_some() && self.get_prompt().is_some() {
364            return Err(ContextCreatorError::InvalidConfiguration(
365                "Cannot specify both --output and a prompt".to_string(),
366            ));
367        }
368
369        // Validate copy and output mutual exclusivity
370        if self.copy && self.output_file.is_some() {
371            return Err(ContextCreatorError::InvalidConfiguration(
372                "Cannot specify both --copy and --output".to_string(),
373            ));
374        }
375
376        // Validate repo and paths mutual exclusivity
377        // When --remote is specified, any positional paths are silently ignored in run()
378        // This prevents user confusion by failing early with a clear error message
379        if self.remote.is_some() && self.paths.is_some() {
380            return Err(ContextCreatorError::InvalidConfiguration(
381                "Cannot specify both --remote and local paths. Use --remote to analyze a remote repository, or provide local paths to analyze local directories.".to_string(),
382            ));
383        }
384
385        Ok(())
386    }
387
388    /// Load configuration from file if specified
389    pub fn load_from_file(&mut self) -> Result<(), crate::utils::error::ContextCreatorError> {
390        use crate::config::ConfigFile;
391
392        let config_file = if let Some(ref config_path) = self.config {
393            // Load from specified config file
394            Some(ConfigFile::load_from_file(config_path)?)
395        } else {
396            // Try to load from default locations
397            ConfigFile::load_default()?
398        };
399
400        if let Some(config_file) = config_file {
401            // Store custom priorities for the walker
402            self.custom_priorities = config_file.priorities.clone();
403
404            // Store token limits for token resolution
405            self.config_token_limits = Some(config_file.tokens.clone());
406
407            config_file.apply_to_cli_config(self);
408
409            if self.verbose > 0 {
410                if let Some(ref config_path) = self.config {
411                    debug!("Loaded configuration from: {}", config_path.display());
412                } else {
413                    debug!("Loaded configuration from default location");
414                }
415            }
416        }
417
418        Ok(())
419    }
420
421    /// Get the prompt from the explicit prompt flag
422    pub fn get_prompt(&self) -> Option<String> {
423        self.prompt
424            .as_ref()
425            .filter(|s| !s.trim().is_empty())
426            .cloned()
427    }
428
429    /// Get all directories from paths argument
430    /// When using --include patterns, this returns the default directory (current dir)
431    /// unless explicit paths are also provided (flexible combinations)
432    pub fn get_directories(&self) -> Vec<PathBuf> {
433        // If explicit paths are provided, use them
434        if let Some(paths) = &self.paths {
435            paths.clone()
436        } else if self.include.is_some() {
437            // When using include patterns without explicit paths, use current directory as base
438            vec![PathBuf::from(".")]
439        } else {
440            // Default to current directory
441            vec![PathBuf::from(".")]
442        }
443    }
444
445    /// Get include patterns if specified
446    pub fn get_include_patterns(&self) -> Vec<String> {
447        self.include.as_ref().cloned().unwrap_or_default()
448    }
449
450    /// Get ignore patterns if specified
451    pub fn get_ignore_patterns(&self) -> Vec<String> {
452        self.ignore.as_ref().cloned().unwrap_or_default()
453    }
454
455    /// Get effective max tokens with precedence: explicit CLI > token limits (if prompt) > config defaults > hard-coded defaults (if prompt) > None
456    pub fn get_effective_max_tokens(&self) -> Option<usize> {
457        // 1. Explicit CLI value always takes precedence
458        if let Some(explicit_tokens) = self.max_tokens {
459            return Some(explicit_tokens);
460        }
461
462        // 2. If using prompt, check token limits from config first
463        if let Some(_prompt) = self.get_prompt() {
464            // Check if we have config token limits for this tool
465            if let Some(token_limits) = &self.config_token_limits {
466                let config_limit = match self.llm_tool {
467                    LlmTool::Gemini => token_limits.gemini,
468                    LlmTool::Codex => token_limits.codex,
469                };
470
471                if let Some(limit) = config_limit {
472                    return Some(limit);
473                }
474            }
475
476            // 3. Fall back to config defaults if available
477            if let Some(defaults_tokens) = self.config_defaults_max_tokens {
478                return Some(defaults_tokens);
479            }
480
481            // 4. Fall back to hard-coded defaults for prompts
482            return Some(self.llm_tool.default_max_tokens());
483        }
484
485        // 5. For non-prompt usage, check config defaults
486        if let Some(defaults_tokens) = self.config_defaults_max_tokens {
487            return Some(defaults_tokens);
488        }
489
490        // 6. No automatic token limits for non-prompt usage
491        None
492    }
493
494    /// Get effective context tokens with prompt reservation
495    /// This accounts for prompt tokens when calculating available space for codebase context
496    pub fn get_effective_context_tokens(&self) -> Option<usize> {
497        if let Some(max_tokens) = self.get_effective_max_tokens() {
498            if let Some(prompt) = self.get_prompt() {
499                // Create token counter to measure prompt
500                if let Ok(counter) = crate::core::token::TokenCounter::new() {
501                    if let Ok(prompt_tokens) = counter.count_tokens(&prompt) {
502                        // Reserve space for prompt + safety buffer for response
503                        let safety_buffer = 1000; // Reserve for LLM response
504                        let reserved = prompt_tokens + safety_buffer;
505                        let available = max_tokens.saturating_sub(reserved);
506                        return Some(available);
507                    }
508                }
509                // Fallback: rough estimation if tiktoken fails
510                let estimated_prompt_tokens = prompt.len().div_ceil(4); // ~4 chars per token
511                let safety_buffer = 1000;
512                let reserved = estimated_prompt_tokens + safety_buffer;
513                let available = max_tokens.saturating_sub(reserved);
514                Some(available)
515            } else {
516                // No prompt, use full token budget
517                Some(max_tokens)
518            }
519        } else {
520            None
521        }
522    }
523
524    /// Check if we should read from stdin
525    pub fn should_read_stdin(&self) -> bool {
526        use std::io::IsTerminal;
527
528        // Explicitly requested stdin
529        if self.read_stdin {
530            return true;
531        }
532
533        // If stdin is not a terminal (i.e., it's piped) and no prompt is provided
534        if !std::io::stdin().is_terminal() && self.get_prompt().is_none() {
535            return true;
536        }
537
538        false
539    }
540}
541
542#[cfg(test)]
543mod tests {
544    use super::*;
545    use std::fs;
546    use tempfile::TempDir;
547
548    impl Config {
549        /// Helper function for creating Config instances in tests
550        #[allow(dead_code)]
551        fn new_for_test(paths: Option<Vec<PathBuf>>) -> Self {
552            Self {
553                paths,
554                quiet: true, // Good default for tests
555                ..Self::default()
556            }
557        }
558
559        /// Helper function for creating Config instances with include patterns in tests
560        #[allow(dead_code)]
561        fn new_for_test_with_include(include: Option<Vec<String>>) -> Self {
562            Self {
563                include,
564                quiet: true, // Good default for tests
565                ..Self::default()
566            }
567        }
568    }
569
570    #[test]
571    fn test_config_validation_valid_directory() {
572        let temp_dir = TempDir::new().unwrap();
573        let config = Config {
574            paths: Some(vec![temp_dir.path().to_path_buf()]),
575            ..Default::default()
576        };
577
578        assert!(config.validate().is_ok());
579    }
580
581    #[test]
582    fn test_config_validation_invalid_directory() {
583        let config = Config {
584            prompt: None,
585            paths: Some(vec![PathBuf::from("/nonexistent/directory")]),
586            include: None,
587            ignore: None,
588            remote: None,
589            read_stdin: false,
590            output_file: None,
591            max_tokens: None,
592            llm_tool: LlmTool::default(),
593            quiet: false,
594            verbose: 0,
595            log_format: LogFormat::default(),
596            config: None,
597            progress: false,
598            copy: false,
599            enhanced_context: false,
600            trace_imports: false,
601            include_callers: false,
602            include_types: false,
603            semantic_depth: 5,
604            custom_priorities: vec![],
605            config_token_limits: None,
606            config_defaults_max_tokens: None,
607        };
608
609        assert!(config.validate().is_err());
610    }
611
612    #[test]
613    fn test_config_validation_file_as_directory() {
614        let temp_dir = TempDir::new().unwrap();
615        let file_path = temp_dir.path().join("file.txt");
616        fs::write(&file_path, "test").unwrap();
617
618        let config = Config {
619            prompt: None,
620            paths: Some(vec![file_path]),
621            include: None,
622            ignore: None,
623            remote: None,
624            read_stdin: false,
625            output_file: None,
626            max_tokens: None,
627            llm_tool: LlmTool::default(),
628            quiet: false,
629            verbose: 0,
630            log_format: LogFormat::default(),
631            config: None,
632            progress: false,
633            copy: false,
634            enhanced_context: false,
635            trace_imports: false,
636            include_callers: false,
637            include_types: false,
638            semantic_depth: 5,
639            custom_priorities: vec![],
640            config_token_limits: None,
641            config_defaults_max_tokens: None,
642        };
643
644        assert!(config.validate().is_err());
645    }
646
647    #[test]
648    fn test_config_validation_invalid_output_directory() {
649        let temp_dir = TempDir::new().unwrap();
650        let config = Config {
651            prompt: None,
652            paths: Some(vec![temp_dir.path().to_path_buf()]),
653            include: None,
654            ignore: None,
655            remote: None,
656            read_stdin: false,
657            output_file: Some(PathBuf::from("/nonexistent/directory/output.md")),
658            max_tokens: None,
659            llm_tool: LlmTool::default(),
660            quiet: false,
661            verbose: 0,
662            log_format: LogFormat::default(),
663            config: None,
664            progress: false,
665            copy: false,
666            enhanced_context: false,
667            trace_imports: false,
668            include_callers: false,
669            include_types: false,
670            semantic_depth: 5,
671            custom_priorities: vec![],
672            config_token_limits: None,
673            config_defaults_max_tokens: None,
674        };
675
676        assert!(config.validate().is_err());
677    }
678
679    #[test]
680    fn test_config_validation_mutually_exclusive_options() {
681        let temp_dir = TempDir::new().unwrap();
682        let config = Config {
683            prompt: Some("test prompt".to_string()),
684            paths: Some(vec![temp_dir.path().to_path_buf()]),
685            include: None,
686            ignore: None,
687            remote: None,
688            read_stdin: false,
689            output_file: Some(temp_dir.path().join("output.md")),
690            max_tokens: None,
691            llm_tool: LlmTool::default(),
692            quiet: false,
693            verbose: 0,
694            log_format: LogFormat::default(),
695            config: None,
696            progress: false,
697            copy: false,
698            enhanced_context: false,
699            trace_imports: false,
700            include_callers: false,
701            include_types: false,
702            semantic_depth: 5,
703            custom_priorities: vec![],
704            config_token_limits: None,
705            config_defaults_max_tokens: None,
706        };
707
708        assert!(config.validate().is_err());
709    }
710
711    #[test]
712    fn test_llm_tool_enum_values() {
713        assert_eq!(LlmTool::Gemini.command(), "gemini");
714        assert_eq!(LlmTool::Codex.command(), "codex");
715
716        assert!(LlmTool::Gemini
717            .install_instructions()
718            .contains("pip install"));
719        assert!(LlmTool::Codex.install_instructions().contains("github.com"));
720
721        assert_eq!(LlmTool::default(), LlmTool::Gemini);
722    }
723
724    #[test]
725    fn test_llm_tool_default_max_tokens() {
726        assert_eq!(LlmTool::Gemini.default_max_tokens(), 1_000_000);
727        assert_eq!(LlmTool::Codex.default_max_tokens(), 1_000_000);
728    }
729
730    #[test]
731    fn test_config_get_effective_max_tokens_with_explicit() {
732        let config = Config {
733            prompt: Some("test prompt".to_string()),
734            max_tokens: Some(500_000),
735            llm_tool: LlmTool::Gemini,
736            ..Config::new_for_test(None)
737        };
738        assert_eq!(config.get_effective_max_tokens(), Some(500_000));
739    }
740
741    #[test]
742    fn test_config_get_effective_max_tokens_with_prompt_default() {
743        let config = Config {
744            prompt: Some("test prompt".to_string()),
745            max_tokens: None,
746            llm_tool: LlmTool::Gemini,
747            ..Config::new_for_test(None)
748        };
749        assert_eq!(config.get_effective_max_tokens(), Some(1_000_000));
750    }
751
752    #[test]
753    fn test_config_get_effective_max_tokens_no_prompt() {
754        let config = Config {
755            prompt: None,
756            max_tokens: None,
757            llm_tool: LlmTool::Gemini,
758            ..Config::new_for_test(None)
759        };
760        assert_eq!(config.get_effective_max_tokens(), None);
761    }
762
763    #[test]
764    fn test_config_get_effective_max_tokens_with_config_gemini() {
765        use crate::config::TokenLimits;
766
767        let config = Config {
768            prompt: Some("test prompt".to_string()),
769            max_tokens: None,
770            llm_tool: LlmTool::Gemini,
771            config_token_limits: Some(TokenLimits {
772                gemini: Some(2_500_000),
773                codex: Some(1_800_000),
774            }),
775            ..Config::new_for_test(None)
776        };
777        assert_eq!(config.get_effective_max_tokens(), Some(2_500_000));
778    }
779
780    #[test]
781    fn test_config_get_effective_max_tokens_with_config_codex() {
782        use crate::config::TokenLimits;
783
784        let config = Config {
785            prompt: Some("test prompt".to_string()),
786            max_tokens: None,
787            llm_tool: LlmTool::Codex,
788            config_token_limits: Some(TokenLimits {
789                gemini: Some(2_500_000),
790                codex: Some(1_800_000),
791            }),
792            ..Config::new_for_test(None)
793        };
794        assert_eq!(config.get_effective_max_tokens(), Some(1_800_000));
795    }
796
797    #[test]
798    fn test_config_get_effective_max_tokens_explicit_overrides_config() {
799        use crate::config::TokenLimits;
800
801        let config = Config {
802            prompt: Some("test prompt".to_string()),
803            max_tokens: Some(500_000), // Explicit value should override config
804            llm_tool: LlmTool::Gemini,
805            config_token_limits: Some(TokenLimits {
806                gemini: Some(2_500_000),
807                codex: Some(1_800_000),
808            }),
809            ..Config::new_for_test(None)
810        };
811        assert_eq!(config.get_effective_max_tokens(), Some(500_000));
812    }
813
814    #[test]
815    fn test_config_get_effective_max_tokens_config_partial_gemini() {
816        use crate::config::TokenLimits;
817
818        let config = Config {
819            prompt: Some("test prompt".to_string()),
820            max_tokens: None,
821            llm_tool: LlmTool::Gemini,
822            config_token_limits: Some(TokenLimits {
823                gemini: Some(3_000_000),
824                codex: None, // Codex not configured
825            }),
826            ..Config::new_for_test(None)
827        };
828        assert_eq!(config.get_effective_max_tokens(), Some(3_000_000));
829    }
830
831    #[test]
832    fn test_config_get_effective_max_tokens_config_partial_codex() {
833        use crate::config::TokenLimits;
834
835        let config = Config {
836            prompt: Some("test prompt".to_string()),
837            max_tokens: None,
838            llm_tool: LlmTool::Codex,
839            config_token_limits: Some(TokenLimits {
840                gemini: None, // Gemini not configured
841                codex: Some(1_200_000),
842            }),
843            ..Config::new_for_test(None)
844        };
845        assert_eq!(config.get_effective_max_tokens(), Some(1_200_000));
846    }
847
848    #[test]
849    fn test_config_get_effective_max_tokens_config_fallback_to_default() {
850        use crate::config::TokenLimits;
851
852        let config = Config {
853            prompt: Some("test prompt".to_string()),
854            max_tokens: None,
855            llm_tool: LlmTool::Gemini,
856            config_token_limits: Some(TokenLimits {
857                gemini: None, // No limit configured for Gemini
858                codex: Some(1_800_000),
859            }),
860            ..Config::new_for_test(None)
861        };
862        // Should fall back to hard-coded default
863        assert_eq!(config.get_effective_max_tokens(), Some(1_000_000));
864    }
865
866    #[test]
867    fn test_llm_tool_default_max_tokens_with_config() {
868        use crate::config::TokenLimits;
869
870        let token_limits = TokenLimits {
871            gemini: Some(2_500_000),
872            codex: Some(1_800_000),
873        };
874
875        assert_eq!(
876            LlmTool::Gemini.default_max_tokens_with_config(Some(&token_limits)),
877            2_500_000
878        );
879        assert_eq!(
880            LlmTool::Codex.default_max_tokens_with_config(Some(&token_limits)),
881            1_800_000
882        );
883    }
884
885    #[test]
886    fn test_llm_tool_default_max_tokens_with_config_partial() {
887        use crate::config::TokenLimits;
888
889        let token_limits = TokenLimits {
890            gemini: Some(3_000_000),
891            codex: None, // Codex not configured
892        };
893
894        assert_eq!(
895            LlmTool::Gemini.default_max_tokens_with_config(Some(&token_limits)),
896            3_000_000
897        );
898        // Should fall back to hard-coded default
899        assert_eq!(
900            LlmTool::Codex.default_max_tokens_with_config(Some(&token_limits)),
901            1_000_000
902        );
903    }
904
905    #[test]
906    fn test_llm_tool_default_max_tokens_with_no_config() {
907        assert_eq!(
908            LlmTool::Gemini.default_max_tokens_with_config(None),
909            1_000_000
910        );
911        assert_eq!(
912            LlmTool::Codex.default_max_tokens_with_config(None),
913            1_000_000
914        );
915    }
916
917    #[test]
918    fn test_get_effective_context_tokens_with_prompt() {
919        let config = Config {
920            prompt: Some("This is a test prompt".to_string()),
921            max_tokens: Some(10000),
922            llm_tool: LlmTool::Gemini,
923            ..Config::new_for_test(None)
924        };
925
926        let context_tokens = config.get_effective_context_tokens().unwrap();
927        // Should be less than max_tokens due to prompt + safety buffer reservation
928        assert!(context_tokens < 10000);
929        // Should be at least max_tokens - 1000 (safety buffer) - prompt tokens
930        assert!(context_tokens > 8000); // Conservative estimate
931    }
932
933    #[test]
934    fn test_get_effective_context_tokens_no_prompt() {
935        let config = Config {
936            prompt: None,
937            max_tokens: Some(10000),
938            llm_tool: LlmTool::Gemini,
939            ..Config::new_for_test(None)
940        };
941
942        // Without prompt, should use full token budget
943        assert_eq!(config.get_effective_context_tokens(), Some(10000));
944    }
945
946    #[test]
947    fn test_get_effective_context_tokens_no_limit() {
948        let config = Config {
949            prompt: None, // No prompt means no auto-limits
950            max_tokens: None,
951            llm_tool: LlmTool::Gemini,
952            ..Config::new_for_test(None)
953        };
954
955        // No max tokens configured and no prompt, should return None
956        assert_eq!(config.get_effective_context_tokens(), None);
957    }
958
959    #[test]
960    fn test_get_effective_context_tokens_with_config_limits() {
961        use crate::config::TokenLimits;
962
963        let config = Config {
964            prompt: Some("This is a longer test prompt for token counting".to_string()),
965            max_tokens: None, // Use config limits instead
966            llm_tool: LlmTool::Gemini,
967            config_token_limits: Some(TokenLimits {
968                gemini: Some(50000),
969                codex: Some(40000),
970            }),
971            ..Config::new_for_test(None)
972        };
973
974        let context_tokens = config.get_effective_context_tokens().unwrap();
975        // Should be less than config limit due to prompt reservation
976        assert!(context_tokens < 50000);
977        assert!(context_tokens > 45000); // Should be most of the budget
978    }
979
980    #[test]
981    fn test_config_validation_output_file_in_current_dir() {
982        let temp_dir = TempDir::new().unwrap();
983        let config = Config {
984            prompt: None,
985            paths: Some(vec![temp_dir.path().to_path_buf()]),
986            include: None,
987            ignore: None,
988            remote: None,
989            read_stdin: false,
990            output_file: Some(PathBuf::from("output.md")),
991            max_tokens: None,
992            llm_tool: LlmTool::default(),
993            quiet: false,
994            verbose: 0,
995            log_format: LogFormat::default(),
996            config: None,
997            progress: false,
998            copy: false,
999            enhanced_context: false,
1000            trace_imports: false,
1001            include_callers: false,
1002            include_types: false,
1003            semantic_depth: 5,
1004            custom_priorities: vec![],
1005            config_token_limits: None,
1006            config_defaults_max_tokens: None,
1007        };
1008
1009        // Should not error for files in current directory
1010        assert!(config.validate().is_ok());
1011    }
1012
1013    #[test]
1014    fn test_config_load_from_file_no_config() {
1015        let temp_dir = TempDir::new().unwrap();
1016        let mut config = Config {
1017            prompt: None,
1018            paths: Some(vec![temp_dir.path().to_path_buf()]),
1019            include: None,
1020            ignore: None,
1021            remote: None,
1022            read_stdin: false,
1023            output_file: None,
1024            max_tokens: None,
1025            llm_tool: LlmTool::default(),
1026            quiet: false,
1027            verbose: 0,
1028            log_format: LogFormat::default(),
1029            config: None,
1030            progress: false,
1031            copy: false,
1032            enhanced_context: false,
1033            trace_imports: false,
1034            include_callers: false,
1035            include_types: false,
1036            semantic_depth: 5,
1037            custom_priorities: vec![],
1038            config_token_limits: None,
1039            config_defaults_max_tokens: None,
1040        };
1041
1042        // Should not error when no config file is found
1043        assert!(config.load_from_file().is_ok());
1044    }
1045
1046    #[test]
1047    fn test_parse_directories() {
1048        use clap::Parser;
1049
1050        // Test single directory
1051        let args = vec!["context-creator", "/path/one"];
1052        let config = Config::parse_from(args);
1053        assert_eq!(config.paths.as_ref().unwrap().len(), 1);
1054        assert_eq!(
1055            config.paths.as_ref().unwrap()[0],
1056            PathBuf::from("/path/one")
1057        );
1058    }
1059
1060    #[test]
1061    fn test_parse_multiple_directories() {
1062        use clap::Parser;
1063
1064        // Test multiple directories
1065        let args = vec!["context-creator", "/path/one", "/path/two", "/path/three"];
1066        let config = Config::parse_from(args);
1067        assert_eq!(config.paths.as_ref().unwrap().len(), 3);
1068        assert_eq!(
1069            config.paths.as_ref().unwrap()[0],
1070            PathBuf::from("/path/one")
1071        );
1072        assert_eq!(
1073            config.paths.as_ref().unwrap()[1],
1074            PathBuf::from("/path/two")
1075        );
1076        assert_eq!(
1077            config.paths.as_ref().unwrap()[2],
1078            PathBuf::from("/path/three")
1079        );
1080
1081        // Test with explicit prompt
1082        let args = vec!["context-creator", "--prompt", "Find duplicated patterns"];
1083        let config = Config::parse_from(args);
1084        assert_eq!(config.prompt, Some("Find duplicated patterns".to_string()));
1085    }
1086
1087    #[test]
1088    fn test_validate_multiple_directories() {
1089        let temp_dir = TempDir::new().unwrap();
1090        let dir1 = temp_dir.path().join("dir1");
1091        let dir2 = temp_dir.path().join("dir2");
1092        fs::create_dir(&dir1).unwrap();
1093        fs::create_dir(&dir2).unwrap();
1094
1095        // All directories exist - should succeed
1096        let config = Config {
1097            prompt: None,
1098            paths: Some(vec![dir1.clone(), dir2.clone()]),
1099            include: None,
1100            ignore: None,
1101            remote: None,
1102            read_stdin: false,
1103            output_file: None,
1104            max_tokens: None,
1105            llm_tool: LlmTool::default(),
1106            quiet: false,
1107            verbose: 0,
1108            log_format: LogFormat::default(),
1109            config: None,
1110            progress: false,
1111            copy: false,
1112            enhanced_context: false,
1113            trace_imports: false,
1114            include_callers: false,
1115            include_types: false,
1116            semantic_depth: 5,
1117            custom_priorities: vec![],
1118            config_token_limits: None,
1119            config_defaults_max_tokens: None,
1120        };
1121        assert!(config.validate().is_ok());
1122
1123        // One directory doesn't exist - should fail
1124        let config = Config {
1125            prompt: None,
1126            paths: Some(vec![dir1, PathBuf::from("/nonexistent/dir")]),
1127            include: None,
1128            ignore: None,
1129            remote: None,
1130            read_stdin: false,
1131            output_file: None,
1132            max_tokens: None,
1133            llm_tool: LlmTool::default(),
1134            quiet: false,
1135            verbose: 0,
1136            log_format: LogFormat::default(),
1137            config: None,
1138            progress: false,
1139            copy: false,
1140            enhanced_context: false,
1141            trace_imports: false,
1142            include_callers: false,
1143            include_types: false,
1144            semantic_depth: 5,
1145            custom_priorities: vec![],
1146            config_token_limits: None,
1147            config_defaults_max_tokens: None,
1148        };
1149        assert!(config.validate().is_err());
1150    }
1151
1152    #[test]
1153    fn test_validate_files_as_directories() {
1154        let temp_dir = TempDir::new().unwrap();
1155        let dir1 = temp_dir.path().join("dir1");
1156        let file1 = temp_dir.path().join("file.txt");
1157        fs::create_dir(&dir1).unwrap();
1158        fs::write(&file1, "test content").unwrap();
1159
1160        // Mix of directory and file - should fail
1161        let config = Config {
1162            prompt: None,
1163            paths: Some(vec![dir1, file1]),
1164            include: None,
1165            ignore: None,
1166            remote: None,
1167            read_stdin: false,
1168            output_file: None,
1169            max_tokens: None,
1170            llm_tool: LlmTool::default(),
1171            quiet: false,
1172            verbose: 0,
1173            log_format: LogFormat::default(),
1174            config: None,
1175            progress: false,
1176            copy: false,
1177            enhanced_context: false,
1178            trace_imports: false,
1179            include_callers: false,
1180            include_types: false,
1181            semantic_depth: 5,
1182            custom_priorities: vec![],
1183            config_token_limits: None,
1184            config_defaults_max_tokens: None,
1185        };
1186        assert!(config.validate().is_err());
1187    }
1188}
context_creator/cli.rs

context_creator/
cli.rs