context_creator/
cli.rs

1//! Command-line interface configuration and parsing
2
3use clap::{Parser, ValueEnum};
4use std::path::PathBuf;
5
6/// Help message explaining custom priority rules and usage
7const AFTER_HELP_MSG: &str = "\
8CUSTOM PRIORITY RULES:
9  Custom priority rules are processed in a 'first-match-wins' basis. Rules are 
10  evaluated in the order they are defined in your .context-creator.toml configuration 
11  file. The first rule that matches a given file will be used, and all subsequent 
12  rules will be ignored for that file.
13
14  Example configuration:
15    [[priorities]]
16    pattern = \"src/**/*.rs\"
17    weight = 10.0
18    
19    [[priorities]]  
20    pattern = \"tests/*\"
21    weight = -2.0
22
23USAGE EXAMPLES:
24  # Process current directory with a prompt
25  context-creator --prompt \"Analyze this code\"
26  
27  # Process specific directories (positional arguments)
28  context-creator src/ tests/ docs/
29  
30  # Process specific directories (explicit include flags)
31  context-creator --include src/ --include tests/ --include docs/
32  
33  # Process files matching glob patterns (QUOTE patterns to prevent shell expansion)
34  context-creator --include \"**/*.py\" --include \"src/**/*.{rs,toml}\"
35  
36  # Process specific file types across all directories
37  context-creator --include \"**/*repository*.py\" --include \"**/test[0-9].py\"
38  
39  # Combine prompt with include patterns for targeted analysis
40  context-creator --prompt \"Review security\" --include \"src/auth/**\" --include \"src/security/**\"
41  
42  # Use ignore patterns to exclude unwanted files
43  context-creator --include \"**/*.rs\" --ignore \"target/**\" --ignore \"**/*_test.rs\"
44  
45  # Combine prompt with ignore patterns
46  context-creator --prompt \"Analyze core logic\" --ignore \"tests/**\" --ignore \"docs/**\"
47  
48  # Process a GitHub repository
49  context-creator --repo https://github.com/owner/repo
50  
51  # Read prompt from stdin
52  echo \"Review this code\" | context-creator --stdin .
53";
54
55/// Supported LLM CLI tools
56#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum, Default)]
57pub enum LlmTool {
58    /// Use gemini (default)
59    #[value(name = "gemini")]
60    #[default]
61    Gemini,
62    /// Use codex CLI
63    #[value(name = "codex")]
64    Codex,
65}
66
67impl LlmTool {
68    /// Get the command name for the tool
69    pub fn command(&self) -> &'static str {
70        match self {
71            LlmTool::Gemini => "gemini",
72            LlmTool::Codex => "codex",
73        }
74    }
75
76    /// Get the installation instructions for the tool
77    pub fn install_instructions(&self) -> &'static str {
78        match self {
79            LlmTool::Gemini => "Please install gemini with: pip install gemini",
80            LlmTool::Codex => {
81                "Please install codex CLI from: https://github.com/microsoft/codex-cli"
82            }
83        }
84    }
85
86    /// Get the default maximum tokens for the tool
87    pub fn default_max_tokens(&self) -> usize {
88        match self {
89            LlmTool::Gemini => 1_000_000,
90            LlmTool::Codex => 1_000_000,
91        }
92    }
93
94    /// Get the default maximum tokens for the tool with optional config override
95    pub fn default_max_tokens_with_config(
96        &self,
97        config_token_limits: Option<&crate::config::TokenLimits>,
98    ) -> usize {
99        if let Some(token_limits) = config_token_limits {
100            match self {
101                LlmTool::Gemini => token_limits.gemini.unwrap_or(1_000_000),
102                LlmTool::Codex => token_limits.codex.unwrap_or(1_000_000),
103            }
104        } else {
105            self.default_max_tokens()
106        }
107    }
108}
109
110/// High-performance CLI tool to convert codebases to Markdown for LLM context
111#[derive(Parser, Debug, Clone)]
112#[command(author, version, about, long_about = None, after_help = AFTER_HELP_MSG)]
113#[command(group(
114    clap::ArgGroup::new("exclusive_inputs")
115        .required(false)
116        .args(&["paths", "repo", "read_stdin"])
117        .multiple(false),
118))]
119pub struct Config {
120    /// The prompt to send to the LLM for processing
121    #[arg(short = 'p', long = "prompt", help = "Process a text prompt directly")]
122    pub prompt: Option<String>,
123
124    /// One or more directory paths to process
125    /// IMPORTANT: Use `get_directories()` to access the correct input paths.
126    #[arg(
127        value_name = "PATHS",
128        help = "Process directories",
129        conflicts_with = "include"
130    )]
131    pub paths: Option<Vec<PathBuf>>,
132
133    /// Include files and directories matching glob patterns
134    /// IMPORTANT: Use `get_directories()` to access the correct input paths.
135    #[arg(
136        long,
137        help = "Include files and directories matching the given glob pattern.\nPatterns use gitignore-style syntax. To prevent shell expansion,\nquote patterns: --include \"*.py\" --include \"src/**/*.{rs,toml}\""
138    )]
139    pub include: Option<Vec<String>>,
140
141    /// Ignore files and directories matching glob patterns
142    #[arg(
143        long,
144        help = "Ignore files and directories matching the given glob pattern.\nPatterns use gitignore-style syntax. To prevent shell expansion,\nquote patterns: --ignore \"node_modules/**\" --ignore \"target/**\""
145    )]
146    pub ignore: Option<Vec<String>>,
147
148    /// GitHub repository URL to analyze (e.g., <https://github.com/owner/repo>)
149    #[arg(long, help = "Process a GitHub repository")]
150    pub repo: Option<String>,
151
152    /// Read prompt from stdin
153    #[arg(long = "stdin", help = "Read prompt from standard input")]
154    pub read_stdin: bool,
155
156    /// The path to the output Markdown file. If used, won't call the LLM CLI
157    #[arg(short = 'o', long)]
158    pub output_file: Option<PathBuf>,
159
160    /// Maximum number of tokens for the generated codebase context
161    #[arg(long)]
162    pub max_tokens: Option<usize>,
163
164    /// LLM CLI tool to use for processing
165    #[arg(short = 't', long = "tool", default_value = "gemini")]
166    pub llm_tool: LlmTool,
167
168    /// Suppress all output except for errors and the final LLM response
169    #[arg(short = 'q', long)]
170    pub quiet: bool,
171
172    /// Enable verbose logging
173    #[arg(short = 'v', long)]
174    pub verbose: bool,
175
176    /// Path to configuration file
177    #[arg(short = 'c', long)]
178    pub config: Option<PathBuf>,
179
180    /// Show progress indicators during processing
181    #[arg(long)]
182    pub progress: bool,
183
184    /// Copy output to system clipboard instead of stdout
185    #[arg(short = 'C', long)]
186    pub copy: bool,
187
188    /// Enable enhanced context with file metadata
189    #[arg(long = "enhanced-context")]
190    pub enhanced_context: bool,
191
192    /// Enable import tracing for included files
193    #[arg(long, help = "Include files that import the specified modules")]
194    pub trace_imports: bool,
195
196    /// Include files that call functions from specified modules
197    #[arg(long, help = "Include files containing callers of specified functions")]
198    pub include_callers: bool,
199
200    /// Include type definitions used by specified files
201    #[arg(long, help = "Include type definitions and interfaces")]
202    pub include_types: bool,
203
204    /// Maximum depth for semantic dependency traversal
205    #[arg(
206        long,
207        default_value = "3",
208        help = "Depth limit for dependency traversal"
209    )]
210    pub semantic_depth: usize,
211
212    /// Custom priority rules loaded from config file (not a CLI argument)
213    #[clap(skip)]
214    pub custom_priorities: Vec<crate::config::Priority>,
215
216    /// Token limits loaded from config file (not a CLI argument)
217    #[clap(skip)]
218    pub config_token_limits: Option<crate::config::TokenLimits>,
219
220    /// Maximum tokens from config defaults (not a CLI argument)
221    #[clap(skip)]
222    pub config_defaults_max_tokens: Option<usize>,
223}
224
225impl Default for Config {
226    fn default() -> Self {
227        Self {
228            prompt: None,
229            paths: None,
230            include: None,
231            ignore: None,
232            repo: None,
233            read_stdin: false,
234            output_file: None,
235            max_tokens: None,
236            llm_tool: LlmTool::default(),
237            quiet: false,
238            verbose: false,
239            config: None,
240            progress: false,
241            copy: false,
242            enhanced_context: false,
243            trace_imports: false,
244            include_callers: false,
245            include_types: false,
246            semantic_depth: 3,
247            custom_priorities: vec![],
248            config_token_limits: None,
249            config_defaults_max_tokens: None,
250        }
251    }
252}
253
254impl Config {
255    /// Validate the configuration
256    pub fn validate(&self) -> Result<(), crate::utils::error::ContextCreatorError> {
257        use crate::utils::error::ContextCreatorError;
258
259        // Validate that at least one input source is provided
260        let has_input_source = self.get_prompt().is_some()
261            || self.paths.is_some()
262            || self.include.is_some()
263            || self.repo.is_some()
264            || self.read_stdin;
265
266        if !has_input_source {
267            return Err(ContextCreatorError::InvalidConfiguration(
268                "At least one input source must be provided: --prompt, paths, --include, --repo, or --stdin".to_string(),
269            ));
270        }
271
272        // Validate mutual exclusivity - prompt cannot be used with paths or repo
273        if self.get_prompt().is_some() && self.paths.is_some() {
274            return Err(ContextCreatorError::InvalidConfiguration(
275                "--prompt cannot be used with directory paths".to_string(),
276            ));
277        }
278
279        if self.get_prompt().is_some() && self.repo.is_some() {
280            return Err(ContextCreatorError::InvalidConfiguration(
281                "--prompt cannot be used with --repo".to_string(),
282            ));
283        }
284
285        // Validate include conflicts - include cannot be used with repo or stdin
286        if self.include.is_some() && self.repo.is_some() {
287            return Err(ContextCreatorError::InvalidConfiguration(
288                "--include cannot be used with --repo".to_string(),
289            ));
290        }
291
292        if self.include.is_some() && self.read_stdin {
293            return Err(ContextCreatorError::InvalidConfiguration(
294                "--include cannot be used with --stdin".to_string(),
295            ));
296        }
297
298        // Validate repo URL if provided
299        if let Some(repo_url) = &self.repo {
300            if !repo_url.starts_with("https://github.com/")
301                && !repo_url.starts_with("http://github.com/")
302            {
303                return Err(ContextCreatorError::InvalidConfiguration(
304                    "Repository URL must be a GitHub URL (https://github.com/owner/repo)"
305                        .to_string(),
306                ));
307            }
308        } else {
309            // Only validate directories if repo is not provided
310            let directories = self.get_directories();
311            for directory in &directories {
312                if !directory.exists() {
313                    return Err(ContextCreatorError::InvalidPath(format!(
314                        "Directory does not exist: {}",
315                        directory.display()
316                    )));
317                }
318
319                if !directory.is_dir() {
320                    return Err(ContextCreatorError::InvalidPath(format!(
321                        "Path is not a directory: {}",
322                        directory.display()
323                    )));
324                }
325            }
326        }
327
328        // Note: Pattern validation is handled by OverrideBuilder in walker.rs
329        // which provides better security and ReDoS protection
330
331        // Validate output file parent directory exists if specified
332        if let Some(output) = &self.output_file {
333            if let Some(parent) = output.parent() {
334                // Handle empty parent (current directory) and check if parent exists
335                if !parent.as_os_str().is_empty() && !parent.exists() {
336                    return Err(ContextCreatorError::InvalidPath(format!(
337                        "Output directory does not exist: {}",
338                        parent.display()
339                    )));
340                }
341            }
342        }
343
344        // Validate mutually exclusive options
345        if self.output_file.is_some() && self.get_prompt().is_some() {
346            return Err(ContextCreatorError::InvalidConfiguration(
347                "Cannot specify both --output and a prompt".to_string(),
348            ));
349        }
350
351        // Validate copy and output mutual exclusivity
352        if self.copy && self.output_file.is_some() {
353            return Err(ContextCreatorError::InvalidConfiguration(
354                "Cannot specify both --copy and --output".to_string(),
355            ));
356        }
357
358        Ok(())
359    }
360
361    /// Load configuration from file if specified
362    pub fn load_from_file(&mut self) -> Result<(), crate::utils::error::ContextCreatorError> {
363        use crate::config::ConfigFile;
364
365        let config_file = if let Some(ref config_path) = self.config {
366            // Load from specified config file
367            Some(ConfigFile::load_from_file(config_path)?)
368        } else {
369            // Try to load from default locations
370            ConfigFile::load_default()?
371        };
372
373        if let Some(config_file) = config_file {
374            // Store custom priorities for the walker
375            self.custom_priorities = config_file.priorities.clone();
376
377            // Store token limits for token resolution
378            self.config_token_limits = Some(config_file.tokens.clone());
379
380            config_file.apply_to_cli_config(self);
381
382            if self.verbose {
383                if let Some(ref config_path) = self.config {
384                    eprintln!("📄 Loaded configuration from: {}", config_path.display());
385                } else {
386                    eprintln!("📄 Loaded configuration from default location");
387                }
388            }
389        }
390
391        Ok(())
392    }
393
394    /// Get the prompt from the explicit prompt flag
395    pub fn get_prompt(&self) -> Option<String> {
396        self.prompt
397            .as_ref()
398            .filter(|s| !s.trim().is_empty())
399            .cloned()
400    }
401
402    /// Get all directories from paths argument
403    /// When using --include patterns, this returns the default directory (current dir)
404    /// since patterns are handled separately by the walker
405    pub fn get_directories(&self) -> Vec<PathBuf> {
406        if self.include.is_some() {
407            // When using include patterns, use current directory as base
408            vec![PathBuf::from(".")]
409        } else {
410            self.paths
411                .as_ref()
412                .cloned()
413                .unwrap_or_else(|| vec![PathBuf::from(".")])
414        }
415    }
416
417    /// Get include patterns if specified
418    pub fn get_include_patterns(&self) -> Vec<String> {
419        self.include.as_ref().cloned().unwrap_or_default()
420    }
421
422    /// Get ignore patterns if specified
423    pub fn get_ignore_patterns(&self) -> Vec<String> {
424        self.ignore.as_ref().cloned().unwrap_or_default()
425    }
426
427    /// Get effective max tokens with precedence: explicit CLI > token limits (if prompt) > config defaults > hard-coded defaults (if prompt) > None
428    pub fn get_effective_max_tokens(&self) -> Option<usize> {
429        // 1. Explicit CLI value always takes precedence
430        if let Some(explicit_tokens) = self.max_tokens {
431            return Some(explicit_tokens);
432        }
433
434        // 2. If using prompt, check token limits from config first
435        if let Some(_prompt) = self.get_prompt() {
436            // Check if we have config token limits for this tool
437            if let Some(token_limits) = &self.config_token_limits {
438                let config_limit = match self.llm_tool {
439                    LlmTool::Gemini => token_limits.gemini,
440                    LlmTool::Codex => token_limits.codex,
441                };
442
443                if let Some(limit) = config_limit {
444                    return Some(limit);
445                }
446            }
447
448            // 3. Fall back to config defaults if available
449            if let Some(defaults_tokens) = self.config_defaults_max_tokens {
450                return Some(defaults_tokens);
451            }
452
453            // 4. Fall back to hard-coded defaults for prompts
454            return Some(self.llm_tool.default_max_tokens());
455        }
456
457        // 5. For non-prompt usage, check config defaults
458        if let Some(defaults_tokens) = self.config_defaults_max_tokens {
459            return Some(defaults_tokens);
460        }
461
462        // 6. No automatic token limits for non-prompt usage
463        None
464    }
465
466    /// Get effective context tokens with prompt reservation
467    /// This accounts for prompt tokens when calculating available space for codebase context
468    pub fn get_effective_context_tokens(&self) -> Option<usize> {
469        if let Some(max_tokens) = self.get_effective_max_tokens() {
470            if let Some(prompt) = self.get_prompt() {
471                // Create token counter to measure prompt
472                if let Ok(counter) = crate::core::token::TokenCounter::new() {
473                    if let Ok(prompt_tokens) = counter.count_tokens(&prompt) {
474                        // Reserve space for prompt + safety buffer for response
475                        let safety_buffer = 1000; // Reserve for LLM response
476                        let reserved = prompt_tokens + safety_buffer;
477                        let available = max_tokens.saturating_sub(reserved);
478                        return Some(available);
479                    }
480                }
481                // Fallback: rough estimation if tiktoken fails
482                let estimated_prompt_tokens = prompt.len().div_ceil(4); // ~4 chars per token
483                let safety_buffer = 1000;
484                let reserved = estimated_prompt_tokens + safety_buffer;
485                let available = max_tokens.saturating_sub(reserved);
486                Some(available)
487            } else {
488                // No prompt, use full token budget
489                Some(max_tokens)
490            }
491        } else {
492            None
493        }
494    }
495
496    /// Check if we should read from stdin
497    pub fn should_read_stdin(&self) -> bool {
498        use std::io::IsTerminal;
499
500        // Explicitly requested stdin
501        if self.read_stdin {
502            return true;
503        }
504
505        // If stdin is not a terminal (i.e., it's piped) and no prompt is provided
506        if !std::io::stdin().is_terminal() && self.get_prompt().is_none() {
507            return true;
508        }
509
510        false
511    }
512}
513
514#[cfg(test)]
515mod tests {
516    use super::*;
517    use std::fs;
518    use tempfile::TempDir;
519
520    impl Config {
521        /// Helper function for creating Config instances in tests
522        #[allow(dead_code)]
523        fn new_for_test(paths: Option<Vec<PathBuf>>) -> Self {
524            Self {
525                paths,
526                quiet: true, // Good default for tests
527                ..Self::default()
528            }
529        }
530
531        /// Helper function for creating Config instances with include patterns in tests
532        #[allow(dead_code)]
533        fn new_for_test_with_include(include: Option<Vec<String>>) -> Self {
534            Self {
535                include,
536                quiet: true, // Good default for tests
537                ..Self::default()
538            }
539        }
540    }
541
542    #[test]
543    fn test_config_validation_valid_directory() {
544        let temp_dir = TempDir::new().unwrap();
545        let config = Config {
546            prompt: None,
547            paths: Some(vec![temp_dir.path().to_path_buf()]),
548            include: None,
549            ignore: None,
550            repo: None,
551            read_stdin: false,
552            output_file: None,
553            max_tokens: None,
554            llm_tool: LlmTool::default(),
555            quiet: false,
556            verbose: false,
557            config: None,
558            progress: false,
559            copy: false,
560            enhanced_context: false,
561            trace_imports: false,
562            include_callers: false,
563            include_types: false,
564            semantic_depth: 3,
565            custom_priorities: vec![],
566            config_token_limits: None,
567            config_defaults_max_tokens: None,
568        };
569
570        assert!(config.validate().is_ok());
571    }
572
573    #[test]
574    fn test_config_validation_invalid_directory() {
575        let config = Config {
576            prompt: None,
577            paths: Some(vec![PathBuf::from("/nonexistent/directory")]),
578            include: None,
579            ignore: None,
580            repo: None,
581            read_stdin: false,
582            output_file: None,
583            max_tokens: None,
584            llm_tool: LlmTool::default(),
585            quiet: false,
586            verbose: false,
587            config: None,
588            progress: false,
589            copy: false,
590            enhanced_context: false,
591            trace_imports: false,
592            include_callers: false,
593            include_types: false,
594            semantic_depth: 3,
595            custom_priorities: vec![],
596            config_token_limits: None,
597            config_defaults_max_tokens: None,
598        };
599
600        assert!(config.validate().is_err());
601    }
602
603    #[test]
604    fn test_config_validation_file_as_directory() {
605        let temp_dir = TempDir::new().unwrap();
606        let file_path = temp_dir.path().join("file.txt");
607        fs::write(&file_path, "test").unwrap();
608
609        let config = Config {
610            prompt: None,
611            paths: Some(vec![file_path]),
612            include: None,
613            ignore: None,
614            repo: None,
615            read_stdin: false,
616            output_file: None,
617            max_tokens: None,
618            llm_tool: LlmTool::default(),
619            quiet: false,
620            verbose: false,
621            config: None,
622            progress: false,
623            copy: false,
624            enhanced_context: false,
625            trace_imports: false,
626            include_callers: false,
627            include_types: false,
628            semantic_depth: 3,
629            custom_priorities: vec![],
630            config_token_limits: None,
631            config_defaults_max_tokens: None,
632        };
633
634        assert!(config.validate().is_err());
635    }
636
637    #[test]
638    fn test_config_validation_invalid_output_directory() {
639        let temp_dir = TempDir::new().unwrap();
640        let config = Config {
641            prompt: None,
642            paths: Some(vec![temp_dir.path().to_path_buf()]),
643            include: None,
644            ignore: None,
645            repo: None,
646            read_stdin: false,
647            output_file: Some(PathBuf::from("/nonexistent/directory/output.md")),
648            max_tokens: None,
649            llm_tool: LlmTool::default(),
650            quiet: false,
651            verbose: false,
652            config: None,
653            progress: false,
654            copy: false,
655            enhanced_context: false,
656            trace_imports: false,
657            include_callers: false,
658            include_types: false,
659            semantic_depth: 3,
660            custom_priorities: vec![],
661            config_token_limits: None,
662            config_defaults_max_tokens: None,
663        };
664
665        assert!(config.validate().is_err());
666    }
667
668    #[test]
669    fn test_config_validation_mutually_exclusive_options() {
670        let temp_dir = TempDir::new().unwrap();
671        let config = Config {
672            prompt: Some("test prompt".to_string()),
673            paths: Some(vec![temp_dir.path().to_path_buf()]),
674            include: None,
675            ignore: None,
676            repo: None,
677            read_stdin: false,
678            output_file: Some(temp_dir.path().join("output.md")),
679            max_tokens: None,
680            llm_tool: LlmTool::default(),
681            quiet: false,
682            verbose: false,
683            config: None,
684            progress: false,
685            copy: false,
686            enhanced_context: false,
687            trace_imports: false,
688            include_callers: false,
689            include_types: false,
690            semantic_depth: 3,
691            custom_priorities: vec![],
692            config_token_limits: None,
693            config_defaults_max_tokens: None,
694        };
695
696        assert!(config.validate().is_err());
697    }
698
699    #[test]
700    fn test_llm_tool_enum_values() {
701        assert_eq!(LlmTool::Gemini.command(), "gemini");
702        assert_eq!(LlmTool::Codex.command(), "codex");
703
704        assert!(LlmTool::Gemini
705            .install_instructions()
706            .contains("pip install"));
707        assert!(LlmTool::Codex.install_instructions().contains("github.com"));
708
709        assert_eq!(LlmTool::default(), LlmTool::Gemini);
710    }
711
712    #[test]
713    fn test_llm_tool_default_max_tokens() {
714        assert_eq!(LlmTool::Gemini.default_max_tokens(), 1_000_000);
715        assert_eq!(LlmTool::Codex.default_max_tokens(), 1_000_000);
716    }
717
718    #[test]
719    fn test_config_get_effective_max_tokens_with_explicit() {
720        let config = Config {
721            prompt: Some("test prompt".to_string()),
722            max_tokens: Some(500_000),
723            llm_tool: LlmTool::Gemini,
724            ..Config::new_for_test(None)
725        };
726        assert_eq!(config.get_effective_max_tokens(), Some(500_000));
727    }
728
729    #[test]
730    fn test_config_get_effective_max_tokens_with_prompt_default() {
731        let config = Config {
732            prompt: Some("test prompt".to_string()),
733            max_tokens: None,
734            llm_tool: LlmTool::Gemini,
735            ..Config::new_for_test(None)
736        };
737        assert_eq!(config.get_effective_max_tokens(), Some(1_000_000));
738    }
739
740    #[test]
741    fn test_config_get_effective_max_tokens_no_prompt() {
742        let config = Config {
743            prompt: None,
744            max_tokens: None,
745            llm_tool: LlmTool::Gemini,
746            ..Config::new_for_test(None)
747        };
748        assert_eq!(config.get_effective_max_tokens(), None);
749    }
750
751    #[test]
752    fn test_config_get_effective_max_tokens_with_config_gemini() {
753        use crate::config::TokenLimits;
754
755        let config = Config {
756            prompt: Some("test prompt".to_string()),
757            max_tokens: None,
758            llm_tool: LlmTool::Gemini,
759            config_token_limits: Some(TokenLimits {
760                gemini: Some(2_500_000),
761                codex: Some(1_800_000),
762            }),
763            ..Config::new_for_test(None)
764        };
765        assert_eq!(config.get_effective_max_tokens(), Some(2_500_000));
766    }
767
768    #[test]
769    fn test_config_get_effective_max_tokens_with_config_codex() {
770        use crate::config::TokenLimits;
771
772        let config = Config {
773            prompt: Some("test prompt".to_string()),
774            max_tokens: None,
775            llm_tool: LlmTool::Codex,
776            config_token_limits: Some(TokenLimits {
777                gemini: Some(2_500_000),
778                codex: Some(1_800_000),
779            }),
780            ..Config::new_for_test(None)
781        };
782        assert_eq!(config.get_effective_max_tokens(), Some(1_800_000));
783    }
784
785    #[test]
786    fn test_config_get_effective_max_tokens_explicit_overrides_config() {
787        use crate::config::TokenLimits;
788
789        let config = Config {
790            prompt: Some("test prompt".to_string()),
791            max_tokens: Some(500_000), // Explicit value should override config
792            llm_tool: LlmTool::Gemini,
793            config_token_limits: Some(TokenLimits {
794                gemini: Some(2_500_000),
795                codex: Some(1_800_000),
796            }),
797            ..Config::new_for_test(None)
798        };
799        assert_eq!(config.get_effective_max_tokens(), Some(500_000));
800    }
801
802    #[test]
803    fn test_config_get_effective_max_tokens_config_partial_gemini() {
804        use crate::config::TokenLimits;
805
806        let config = Config {
807            prompt: Some("test prompt".to_string()),
808            max_tokens: None,
809            llm_tool: LlmTool::Gemini,
810            config_token_limits: Some(TokenLimits {
811                gemini: Some(3_000_000),
812                codex: None, // Codex not configured
813            }),
814            ..Config::new_for_test(None)
815        };
816        assert_eq!(config.get_effective_max_tokens(), Some(3_000_000));
817    }
818
819    #[test]
820    fn test_config_get_effective_max_tokens_config_partial_codex() {
821        use crate::config::TokenLimits;
822
823        let config = Config {
824            prompt: Some("test prompt".to_string()),
825            max_tokens: None,
826            llm_tool: LlmTool::Codex,
827            config_token_limits: Some(TokenLimits {
828                gemini: None, // Gemini not configured
829                codex: Some(1_200_000),
830            }),
831            ..Config::new_for_test(None)
832        };
833        assert_eq!(config.get_effective_max_tokens(), Some(1_200_000));
834    }
835
836    #[test]
837    fn test_config_get_effective_max_tokens_config_fallback_to_default() {
838        use crate::config::TokenLimits;
839
840        let config = Config {
841            prompt: Some("test prompt".to_string()),
842            max_tokens: None,
843            llm_tool: LlmTool::Gemini,
844            config_token_limits: Some(TokenLimits {
845                gemini: None, // No limit configured for Gemini
846                codex: Some(1_800_000),
847            }),
848            ..Config::new_for_test(None)
849        };
850        // Should fall back to hard-coded default
851        assert_eq!(config.get_effective_max_tokens(), Some(1_000_000));
852    }
853
854    #[test]
855    fn test_llm_tool_default_max_tokens_with_config() {
856        use crate::config::TokenLimits;
857
858        let token_limits = TokenLimits {
859            gemini: Some(2_500_000),
860            codex: Some(1_800_000),
861        };
862
863        assert_eq!(
864            LlmTool::Gemini.default_max_tokens_with_config(Some(&token_limits)),
865            2_500_000
866        );
867        assert_eq!(
868            LlmTool::Codex.default_max_tokens_with_config(Some(&token_limits)),
869            1_800_000
870        );
871    }
872
873    #[test]
874    fn test_llm_tool_default_max_tokens_with_config_partial() {
875        use crate::config::TokenLimits;
876
877        let token_limits = TokenLimits {
878            gemini: Some(3_000_000),
879            codex: None, // Codex not configured
880        };
881
882        assert_eq!(
883            LlmTool::Gemini.default_max_tokens_with_config(Some(&token_limits)),
884            3_000_000
885        );
886        // Should fall back to hard-coded default
887        assert_eq!(
888            LlmTool::Codex.default_max_tokens_with_config(Some(&token_limits)),
889            1_000_000
890        );
891    }
892
893    #[test]
894    fn test_llm_tool_default_max_tokens_with_no_config() {
895        assert_eq!(
896            LlmTool::Gemini.default_max_tokens_with_config(None),
897            1_000_000
898        );
899        assert_eq!(
900            LlmTool::Codex.default_max_tokens_with_config(None),
901            1_000_000
902        );
903    }
904
905    #[test]
906    fn test_get_effective_context_tokens_with_prompt() {
907        let config = Config {
908            prompt: Some("This is a test prompt".to_string()),
909            max_tokens: Some(10000),
910            llm_tool: LlmTool::Gemini,
911            ..Config::new_for_test(None)
912        };
913
914        let context_tokens = config.get_effective_context_tokens().unwrap();
915        // Should be less than max_tokens due to prompt + safety buffer reservation
916        assert!(context_tokens < 10000);
917        // Should be at least max_tokens - 1000 (safety buffer) - prompt tokens
918        assert!(context_tokens > 8000); // Conservative estimate
919    }
920
921    #[test]
922    fn test_get_effective_context_tokens_no_prompt() {
923        let config = Config {
924            prompt: None,
925            max_tokens: Some(10000),
926            llm_tool: LlmTool::Gemini,
927            ..Config::new_for_test(None)
928        };
929
930        // Without prompt, should use full token budget
931        assert_eq!(config.get_effective_context_tokens(), Some(10000));
932    }
933
934    #[test]
935    fn test_get_effective_context_tokens_no_limit() {
936        let config = Config {
937            prompt: None, // No prompt means no auto-limits
938            max_tokens: None,
939            llm_tool: LlmTool::Gemini,
940            ..Config::new_for_test(None)
941        };
942
943        // No max tokens configured and no prompt, should return None
944        assert_eq!(config.get_effective_context_tokens(), None);
945    }
946
947    #[test]
948    fn test_get_effective_context_tokens_with_config_limits() {
949        use crate::config::TokenLimits;
950
951        let config = Config {
952            prompt: Some("This is a longer test prompt for token counting".to_string()),
953            max_tokens: None, // Use config limits instead
954            llm_tool: LlmTool::Gemini,
955            config_token_limits: Some(TokenLimits {
956                gemini: Some(50000),
957                codex: Some(40000),
958            }),
959            ..Config::new_for_test(None)
960        };
961
962        let context_tokens = config.get_effective_context_tokens().unwrap();
963        // Should be less than config limit due to prompt reservation
964        assert!(context_tokens < 50000);
965        assert!(context_tokens > 45000); // Should be most of the budget
966    }
967
968    #[test]
969    fn test_config_validation_output_file_in_current_dir() {
970        let temp_dir = TempDir::new().unwrap();
971        let config = Config {
972            prompt: None,
973            paths: Some(vec![temp_dir.path().to_path_buf()]),
974            include: None,
975            ignore: None,
976            repo: None,
977            read_stdin: false,
978            output_file: Some(PathBuf::from("output.md")),
979            max_tokens: None,
980            llm_tool: LlmTool::default(),
981            quiet: false,
982            verbose: false,
983            config: None,
984            progress: false,
985            copy: false,
986            enhanced_context: false,
987            trace_imports: false,
988            include_callers: false,
989            include_types: false,
990            semantic_depth: 3,
991            custom_priorities: vec![],
992            config_token_limits: None,
993            config_defaults_max_tokens: None,
994        };
995
996        // Should not error for files in current directory
997        assert!(config.validate().is_ok());
998    }
999
1000    #[test]
1001    fn test_config_load_from_file_no_config() {
1002        let temp_dir = TempDir::new().unwrap();
1003        let mut config = Config {
1004            prompt: None,
1005            paths: Some(vec![temp_dir.path().to_path_buf()]),
1006            include: None,
1007            ignore: None,
1008            repo: None,
1009            read_stdin: false,
1010            output_file: None,
1011            max_tokens: None,
1012            llm_tool: LlmTool::default(),
1013            quiet: false,
1014            verbose: false,
1015            config: None,
1016            progress: false,
1017            copy: false,
1018            enhanced_context: false,
1019            trace_imports: false,
1020            include_callers: false,
1021            include_types: false,
1022            semantic_depth: 3,
1023            custom_priorities: vec![],
1024            config_token_limits: None,
1025            config_defaults_max_tokens: None,
1026        };
1027
1028        // Should not error when no config file is found
1029        assert!(config.load_from_file().is_ok());
1030    }
1031
1032    #[test]
1033    fn test_parse_directories() {
1034        use clap::Parser;
1035
1036        // Test single directory
1037        let args = vec!["context-creator", "/path/one"];
1038        let config = Config::parse_from(args);
1039        assert_eq!(config.paths.as_ref().unwrap().len(), 1);
1040        assert_eq!(
1041            config.paths.as_ref().unwrap()[0],
1042            PathBuf::from("/path/one")
1043        );
1044    }
1045
1046    #[test]
1047    fn test_parse_multiple_directories() {
1048        use clap::Parser;
1049
1050        // Test multiple directories
1051        let args = vec!["context-creator", "/path/one", "/path/two", "/path/three"];
1052        let config = Config::parse_from(args);
1053        assert_eq!(config.paths.as_ref().unwrap().len(), 3);
1054        assert_eq!(
1055            config.paths.as_ref().unwrap()[0],
1056            PathBuf::from("/path/one")
1057        );
1058        assert_eq!(
1059            config.paths.as_ref().unwrap()[1],
1060            PathBuf::from("/path/two")
1061        );
1062        assert_eq!(
1063            config.paths.as_ref().unwrap()[2],
1064            PathBuf::from("/path/three")
1065        );
1066
1067        // Test with explicit prompt
1068        let args = vec!["context-creator", "--prompt", "Find duplicated patterns"];
1069        let config = Config::parse_from(args);
1070        assert_eq!(config.prompt, Some("Find duplicated patterns".to_string()));
1071    }
1072
1073    #[test]
1074    fn test_validate_multiple_directories() {
1075        let temp_dir = TempDir::new().unwrap();
1076        let dir1 = temp_dir.path().join("dir1");
1077        let dir2 = temp_dir.path().join("dir2");
1078        fs::create_dir(&dir1).unwrap();
1079        fs::create_dir(&dir2).unwrap();
1080
1081        // All directories exist - should succeed
1082        let config = Config {
1083            prompt: None,
1084            paths: Some(vec![dir1.clone(), dir2.clone()]),
1085            include: None,
1086            ignore: None,
1087            repo: None,
1088            read_stdin: false,
1089            output_file: None,
1090            max_tokens: None,
1091            llm_tool: LlmTool::default(),
1092            quiet: false,
1093            verbose: false,
1094            config: None,
1095            progress: false,
1096            copy: false,
1097            enhanced_context: false,
1098            trace_imports: false,
1099            include_callers: false,
1100            include_types: false,
1101            semantic_depth: 3,
1102            custom_priorities: vec![],
1103            config_token_limits: None,
1104            config_defaults_max_tokens: None,
1105        };
1106        assert!(config.validate().is_ok());
1107
1108        // One directory doesn't exist - should fail
1109        let config = Config {
1110            prompt: None,
1111            paths: Some(vec![dir1, PathBuf::from("/nonexistent/dir")]),
1112            include: None,
1113            ignore: None,
1114            repo: None,
1115            read_stdin: false,
1116            output_file: None,
1117            max_tokens: None,
1118            llm_tool: LlmTool::default(),
1119            quiet: false,
1120            verbose: false,
1121            config: None,
1122            progress: false,
1123            copy: false,
1124            enhanced_context: false,
1125            trace_imports: false,
1126            include_callers: false,
1127            include_types: false,
1128            semantic_depth: 3,
1129            custom_priorities: vec![],
1130            config_token_limits: None,
1131            config_defaults_max_tokens: None,
1132        };
1133        assert!(config.validate().is_err());
1134    }
1135
1136    #[test]
1137    fn test_validate_files_as_directories() {
1138        let temp_dir = TempDir::new().unwrap();
1139        let dir1 = temp_dir.path().join("dir1");
1140        let file1 = temp_dir.path().join("file.txt");
1141        fs::create_dir(&dir1).unwrap();
1142        fs::write(&file1, "test content").unwrap();
1143
1144        // Mix of directory and file - should fail
1145        let config = Config {
1146            prompt: None,
1147            paths: Some(vec![dir1, file1]),
1148            include: None,
1149            ignore: None,
1150            repo: None,
1151            read_stdin: false,
1152            output_file: None,
1153            max_tokens: None,
1154            llm_tool: LlmTool::default(),
1155            quiet: false,
1156            verbose: false,
1157            config: None,
1158            progress: false,
1159            copy: false,
1160            enhanced_context: false,
1161            trace_imports: false,
1162            include_callers: false,
1163            include_types: false,
1164            semantic_depth: 3,
1165            custom_priorities: vec![],
1166            config_token_limits: None,
1167            config_defaults_max_tokens: None,
1168        };
1169        assert!(config.validate().is_err());
1170    }
1171}
context_creator/cli.rs

context_creator/
cli.rs