context_creator/
cli.rs

1//! Command-line interface configuration and parsing
2
3use clap::{Parser, Subcommand, ValueEnum};
4use std::path::PathBuf;
5use tracing::debug;
6
7/// Usage examples for the examples command
8pub const USAGE_EXAMPLES: &str = "\
9USAGE EXAMPLES:
10
11Basic Usage:
12  # Process current directory
13  context-creator
14  
15  # Process specific directories
16  context-creator src/ tests/ docs/
17  
18  # Save to file
19  context-creator -o context.md
20
21Pattern Matching:
22  # Include specific file types (quote patterns to prevent shell expansion)
23  context-creator --include \"**/*.py\" --include \"src/**/*.{rs,toml}\"
24  
25  # Exclude patterns
26  context-creator --ignore \"**/*_test.py\" --ignore \"**/migrations/**\"
27  
28  # Combine includes and excludes
29  context-creator --include \"**/*.ts\" --ignore \"node_modules/**\"
30
31Search Command:
32  # Search for a term with automatic semantic analysis
33  context-creator search \"AuthenticationService\"
34  
35  # Search without semantic analysis (faster)
36  context-creator search \"TODO\" --no-semantic
37  
38  # Search in specific directories
39  context-creator search \"database\" src/ tests/
40
41Semantic Analysis:
42  # Trace import dependencies
43  context-creator --trace-imports --include \"**/auth.py\"
44  
45  # Find function callers
46  context-creator --include-callers --include \"**/payment.ts\"
47  
48  # Include type definitions
49  context-creator --include-types --include \"**/models/**\"
50  
51  # Control traversal depth
52  context-creator --semantic-depth 5 --include \"src/core/**\"
53
54LLM Integration:
55  # Ask questions about your codebase
56  context-creator --prompt \"How does authentication work?\"
57  
58  # Targeted analysis
59  context-creator --prompt \"Review security\" --include \"src/auth/**\"
60  
61  # Read prompt from stdin
62  echo \"Find performance issues\" | context-creator --stdin
63
64Remote Repositories:
65  # Analyze GitHub repository
66  context-creator --repo https://github.com/owner/repo
67  
68  # With specific patterns
69  context-creator --repo https://github.com/facebook/react --include \"**/*.js\"
70
71Advanced Options:
72  # Copy to clipboard
73  context-creator --include \"**/*.py\" --copy
74  
75  # Set token limit
76  context-creator --max-tokens 100000
77  
78  # Verbose logging
79  context-creator -vv --include \"src/**\"
80";
81
82/// Help message explaining custom priority rules
83const AFTER_HELP_MSG: &str = "\
84CUSTOM PRIORITY RULES:
85  Custom priority rules are processed in a 'first-match-wins' basis. Rules are 
86  evaluated in the order they are defined in your .context-creator.toml configuration 
87  file. The first rule that matches a given file will be used, and all subsequent 
88  rules will be ignored for that file.
89
90  Example configuration:
91    [[priorities]]
92    pattern = \"src/**/*.rs\"
93    weight = 10.0
94    
95    [[priorities]]  
96    pattern = \"tests/*\"
97    weight = -2.0
98
99For usage examples, run: context-creator examples
100";
101
102/// Supported LLM CLI tools
103#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum, Default)]
104pub enum LlmTool {
105    /// Use gemini (default)
106    #[value(name = "gemini")]
107    #[default]
108    Gemini,
109    /// Use codex CLI
110    #[value(name = "codex")]
111    Codex,
112}
113
114/// Log output format options
115#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum, Default)]
116pub enum LogFormat {
117    /// Human-readable plain text format (default)
118    #[value(name = "plain")]
119    #[default]
120    Plain,
121    /// Machine-readable JSON format
122    #[value(name = "json")]
123    Json,
124}
125
126/// Output format options for the generated context
127#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum, Default)]
128pub enum OutputFormat {
129    /// Markdown format (default)
130    #[value(name = "markdown")]
131    #[default]
132    Markdown,
133    /// XML format with structured data
134    #[value(name = "xml")]
135    Xml,
136    /// Plain text format
137    #[value(name = "plain")]
138    Plain,
139    /// List of file paths only
140    #[value(name = "paths")]
141    Paths,
142}
143
144impl LlmTool {
145    /// Get the command name for the tool
146    pub fn command(&self) -> &'static str {
147        match self {
148            LlmTool::Gemini => "gemini",
149            LlmTool::Codex => "codex",
150        }
151    }
152
153    /// Get the installation instructions for the tool
154    pub fn install_instructions(&self) -> &'static str {
155        match self {
156            LlmTool::Gemini => "Please install gemini with: pip install gemini",
157            LlmTool::Codex => {
158                "Please install codex CLI from: https://github.com/microsoft/codex-cli"
159            }
160        }
161    }
162
163    /// Get the default maximum tokens for the tool
164    pub fn default_max_tokens(&self) -> usize {
165        match self {
166            LlmTool::Gemini => 1_000_000,
167            LlmTool::Codex => 1_000_000,
168        }
169    }
170
171    /// Get the default maximum tokens for the tool with optional config override
172    pub fn default_max_tokens_with_config(
173        &self,
174        config_token_limits: Option<&crate::config::TokenLimits>,
175    ) -> usize {
176        if let Some(token_limits) = config_token_limits {
177            match self {
178                LlmTool::Gemini => token_limits.gemini.unwrap_or(1_000_000),
179                LlmTool::Codex => token_limits.codex.unwrap_or(1_000_000),
180            }
181        } else {
182            self.default_max_tokens()
183        }
184    }
185}
186
187/// Available commands for context-creator
188#[derive(Subcommand, Debug, Clone)]
189pub enum Commands {
190    /// Search for files containing the specified term
191    Search {
192        /// Search pattern (case-insensitive)
193        pattern: String,
194
195        /// Disable automatic semantic analysis
196        #[arg(long = "no-semantic")]
197        no_semantic: bool,
198
199        /// Search within specific paths
200        #[arg(value_name = "PATHS")]
201        paths: Option<Vec<PathBuf>>,
202    },
203
204    /// Show usage examples
205    Examples,
206}
207
208/// High-performance CLI tool to convert codebases to Markdown for LLM context
209#[derive(Parser, Debug, Clone)]
210#[command(author, version, about, long_about = None, after_help = AFTER_HELP_MSG)]
211pub struct Config {
212    /// Subcommand to execute
213    #[command(subcommand)]
214    pub command: Option<Commands>,
215    /// The prompt to send to the LLM for processing
216    #[arg(short = 'p', long = "prompt", help = "Process a text prompt directly")]
217    pub prompt: Option<String>,
218
219    /// One or more directory paths to process
220    /// IMPORTANT: Use `get_directories()` to access the correct input paths.
221    #[arg(value_name = "PATHS", help = "Process files and directories")]
222    pub paths: Option<Vec<PathBuf>>,
223
224    /// Include files and directories matching glob patterns
225    /// IMPORTANT: Use `get_directories()` to access the correct input paths.
226    #[arg(
227        long,
228        help = "Include files and directories matching the given glob pattern.\nPatterns use gitignore-style syntax. To prevent shell expansion,\nquote patterns: --include \"*.py\" --include \"src/**/*.{rs,toml}\""
229    )]
230    pub include: Option<Vec<String>>,
231
232    /// Ignore files and directories matching glob patterns
233    #[arg(
234        long,
235        help = "Ignore files and directories matching the given glob pattern.\nPatterns use gitignore-style syntax. To prevent shell expansion,\nquote patterns: --ignore \"node_modules/**\" --ignore \"target/**\""
236    )]
237    pub ignore: Option<Vec<String>>,
238
239    /// GitHub repository URL to analyze (e.g., <https://github.com/owner/repo>)
240    #[arg(long, help = "Process a GitHub repository")]
241    pub remote: Option<String>,
242
243    /// Read prompt from stdin
244    #[arg(long = "stdin", help = "Read prompt from standard input")]
245    pub read_stdin: bool,
246
247    /// The path to the output Markdown file. If used, won't call the LLM CLI
248    #[arg(short = 'o', long)]
249    pub output_file: Option<PathBuf>,
250
251    /// Maximum number of tokens for the generated codebase context
252    #[arg(long)]
253    pub max_tokens: Option<usize>,
254
255    /// LLM CLI tool to use for processing
256    #[arg(short = 't', long = "tool", default_value = "gemini")]
257    pub llm_tool: LlmTool,
258
259    /// Suppress all output except for errors and the final LLM response
260    #[arg(short = 'q', long)]
261    pub quiet: bool,
262
263    /// Enable verbose logging (use -vv for trace level)
264    #[arg(short = 'v', long, action = clap::ArgAction::Count)]
265    pub verbose: u8,
266
267    /// Log output format
268    #[arg(long = "log-format", value_enum, default_value = "plain")]
269    pub log_format: LogFormat,
270
271    /// Path to configuration file
272    #[arg(short = 'c', long)]
273    pub config: Option<PathBuf>,
274
275    /// Show progress indicators during processing
276    #[arg(long)]
277    pub progress: bool,
278
279    /// Copy output to system clipboard instead of stdout
280    #[arg(short = 'C', long)]
281    pub copy: bool,
282
283    /// Enable enhanced context with file metadata
284    #[arg(long = "enhanced-context")]
285    pub enhanced_context: bool,
286
287    /// Output format style
288    #[arg(long = "style", value_enum, default_value = "markdown")]
289    pub output_format: OutputFormat,
290
291    /// Enable import tracing for included files
292    #[arg(long, help = "Include files that import the specified modules")]
293    pub trace_imports: bool,
294
295    /// Include files that call functions from specified modules
296    #[arg(long, help = "Include files containing callers of specified functions")]
297    pub include_callers: bool,
298
299    /// Include type definitions used by specified files
300    #[arg(long, help = "Include type definitions and interfaces")]
301    pub include_types: bool,
302
303    /// Maximum depth for semantic dependency traversal
304    #[arg(
305        long,
306        default_value = "5",
307        help = "Depth limit for dependency traversal"
308    )]
309    pub semantic_depth: usize,
310
311    /// Custom priority rules loaded from config file (not a CLI argument)
312    #[clap(skip)]
313    pub custom_priorities: Vec<crate::config::Priority>,
314
315    /// Token limits loaded from config file (not a CLI argument)
316    #[clap(skip)]
317    pub config_token_limits: Option<crate::config::TokenLimits>,
318
319    /// Maximum tokens from config defaults (not a CLI argument)
320    #[clap(skip)]
321    pub config_defaults_max_tokens: Option<usize>,
322}
323
324impl Default for Config {
325    fn default() -> Self {
326        Self {
327            command: None,
328            prompt: None,
329            paths: None,
330            include: None,
331            ignore: None,
332            remote: None,
333            read_stdin: false,
334            output_file: None,
335            max_tokens: None,
336            llm_tool: LlmTool::default(),
337            quiet: false,
338            verbose: 0,
339            log_format: LogFormat::default(),
340            config: None,
341            progress: false,
342            copy: false,
343            enhanced_context: false,
344            output_format: OutputFormat::default(),
345            trace_imports: false,
346            include_callers: false,
347            include_types: false,
348            semantic_depth: 5,
349            custom_priorities: vec![],
350            config_token_limits: None,
351            config_defaults_max_tokens: None,
352        }
353    }
354}
355
356impl Config {
357    /// Validate the configuration
358    pub fn validate(&self) -> Result<(), crate::utils::error::ContextCreatorError> {
359        use crate::utils::error::ContextCreatorError;
360
361        // If a command is provided, it's a valid input source on its own
362        if self.command.is_some() {
363            return Ok(());
364        }
365
366        // Validate that at least one input source is provided
367        let has_input_source = self.get_prompt().is_some()
368            || self.paths.is_some()
369            || self.include.is_some()
370            || self.remote.is_some()
371            || self.read_stdin;
372
373        if !has_input_source {
374            return Err(ContextCreatorError::InvalidConfiguration(
375                "At least one input source must be provided: --prompt, paths, --include, --remote, or --stdin".to_string(),
376            ));
377        }
378
379        // Validate verbose and quiet mutual exclusion
380        if self.verbose > 0 && self.quiet {
381            return Err(ContextCreatorError::InvalidConfiguration(
382                "Cannot use both --verbose (-v) and --quiet (-q) flags together".to_string(),
383            ));
384        }
385
386        // Note: Removed overly restrictive validation rules per issue #34
387        // Now allowing flexible combinations like:
388        // - --prompt with paths (--prompt "text" src/)
389        // - --prompt with --remote (--prompt "text" --remote url)
390        // - --stdin with paths (echo "prompt" | context-creator --stdin src/)
391        // - --include with --remote (--include "**/*.rs" --remote url)
392        // - --include with --stdin (--stdin --include "**/*.rs")
393        //
394        // The only remaining restrictions are for legitimate conflicts:
395        // - --prompt with --output-file (can't send to LLM and write to file)
396        // - --copy with --output-file (can't copy to clipboard and write to file)
397
398        // Validate repo URL if provided
399        if let Some(repo_url) = &self.remote {
400            if !repo_url.starts_with("https://github.com/")
401                && !repo_url.starts_with("http://github.com/")
402            {
403                return Err(ContextCreatorError::InvalidConfiguration(
404                    "Repository URL must be a GitHub URL (https://github.com/owner/repo)"
405                        .to_string(),
406                ));
407            }
408        } else {
409            // Only validate paths if repo is not provided
410            let paths = self.get_directories();
411            for path in &paths {
412                if !path.exists() {
413                    return Err(ContextCreatorError::InvalidPath(format!(
414                        "Path does not exist: {}",
415                        path.display()
416                    )));
417                }
418
419                // Allow both files and directories
420                if !path.is_dir() && !path.is_file() {
421                    return Err(ContextCreatorError::InvalidPath(format!(
422                        "Path is neither a file nor a directory: {}",
423                        path.display()
424                    )));
425                }
426            }
427        }
428
429        // Note: Pattern validation is handled by OverrideBuilder in walker.rs
430        // which provides better security and ReDoS protection
431
432        // Validate output file parent directory exists if specified
433        if let Some(output) = &self.output_file {
434            if let Some(parent) = output.parent() {
435                // Handle empty parent (current directory) and check if parent exists
436                if !parent.as_os_str().is_empty() && !parent.exists() {
437                    return Err(ContextCreatorError::InvalidPath(format!(
438                        "Output directory does not exist: {}",
439                        parent.display()
440                    )));
441                }
442            }
443        }
444
445        // Validate mutually exclusive options
446        if self.output_file.is_some() && self.get_prompt().is_some() {
447            return Err(ContextCreatorError::InvalidConfiguration(
448                "Cannot specify both --output and a prompt".to_string(),
449            ));
450        }
451
452        // Validate copy and output mutual exclusivity
453        if self.copy && self.output_file.is_some() {
454            return Err(ContextCreatorError::InvalidConfiguration(
455                "Cannot specify both --copy and --output".to_string(),
456            ));
457        }
458
459        // Validate repo and paths mutual exclusivity
460        // When --remote is specified, any positional paths are silently ignored in run()
461        // This prevents user confusion by failing early with a clear error message
462        if self.remote.is_some() && self.paths.is_some() {
463            return Err(ContextCreatorError::InvalidConfiguration(
464                "Cannot specify both --remote and local paths. Use --remote to analyze a remote repository, or provide local paths to analyze local directories.".to_string(),
465            ));
466        }
467
468        Ok(())
469    }
470
471    /// Load configuration from file if specified
472    pub fn load_from_file(&mut self) -> Result<(), crate::utils::error::ContextCreatorError> {
473        use crate::config::ConfigFile;
474
475        let config_file = if let Some(ref config_path) = self.config {
476            // Load from specified config file
477            Some(ConfigFile::load_from_file(config_path)?)
478        } else {
479            // Try to load from default locations
480            ConfigFile::load_default()?
481        };
482
483        if let Some(config_file) = config_file {
484            // Store custom priorities for the walker
485            self.custom_priorities = config_file.priorities.clone();
486
487            // Store token limits for token resolution
488            self.config_token_limits = Some(config_file.tokens.clone());
489
490            config_file.apply_to_cli_config(self);
491
492            if self.verbose > 0 {
493                if let Some(ref config_path) = self.config {
494                    debug!("Loaded configuration from: {}", config_path.display());
495                } else {
496                    debug!("Loaded configuration from default location");
497                }
498            }
499        }
500
501        Ok(())
502    }
503
504    /// Get the prompt from the explicit prompt flag
505    pub fn get_prompt(&self) -> Option<String> {
506        self.prompt
507            .as_ref()
508            .filter(|s| !s.trim().is_empty())
509            .cloned()
510    }
511
512    /// Get all directories from paths argument
513    /// When using --include patterns, this returns the default directory (current dir)
514    /// unless explicit paths are also provided (flexible combinations)
515    pub fn get_directories(&self) -> Vec<PathBuf> {
516        // If explicit paths are provided, use them
517        if let Some(paths) = &self.paths {
518            paths.clone()
519        } else if self.include.is_some() {
520            // When using include patterns without explicit paths, use current directory as base
521            vec![PathBuf::from(".")]
522        } else {
523            // Default to current directory
524            vec![PathBuf::from(".")]
525        }
526    }
527
528    /// Get include patterns if specified
529    pub fn get_include_patterns(&self) -> Vec<String> {
530        self.include.as_ref().cloned().unwrap_or_default()
531    }
532
533    /// Get ignore patterns if specified
534    pub fn get_ignore_patterns(&self) -> Vec<String> {
535        self.ignore.as_ref().cloned().unwrap_or_default()
536    }
537
538    /// Get effective max tokens with precedence: explicit CLI > token limits (if prompt) > config defaults > hard-coded defaults (if prompt) > None
539    pub fn get_effective_max_tokens(&self) -> Option<usize> {
540        // 1. Explicit CLI value always takes precedence
541        if let Some(explicit_tokens) = self.max_tokens {
542            return Some(explicit_tokens);
543        }
544
545        // 2. If using prompt, check token limits from config first
546        if let Some(_prompt) = self.get_prompt() {
547            // Check if we have config token limits for this tool
548            if let Some(token_limits) = &self.config_token_limits {
549                let config_limit = match self.llm_tool {
550                    LlmTool::Gemini => token_limits.gemini,
551                    LlmTool::Codex => token_limits.codex,
552                };
553
554                if let Some(limit) = config_limit {
555                    return Some(limit);
556                }
557            }
558
559            // 3. Fall back to config defaults if available
560            if let Some(defaults_tokens) = self.config_defaults_max_tokens {
561                return Some(defaults_tokens);
562            }
563
564            // 4. Fall back to hard-coded defaults for prompts
565            return Some(self.llm_tool.default_max_tokens());
566        }
567
568        // 5. For non-prompt usage, check config defaults
569        if let Some(defaults_tokens) = self.config_defaults_max_tokens {
570            return Some(defaults_tokens);
571        }
572
573        // 6. No automatic token limits for non-prompt usage
574        None
575    }
576
577    /// Get effective context tokens with prompt reservation
578    /// This accounts for prompt tokens when calculating available space for codebase context
579    pub fn get_effective_context_tokens(&self) -> Option<usize> {
580        if let Some(max_tokens) = self.get_effective_max_tokens() {
581            if let Some(prompt) = self.get_prompt() {
582                // Create token counter to measure prompt
583                if let Ok(counter) = crate::core::token::TokenCounter::new() {
584                    if let Ok(prompt_tokens) = counter.count_tokens(&prompt) {
585                        // Reserve space for prompt + safety buffer for response
586                        let safety_buffer = 1000; // Reserve for LLM response
587                        let reserved = prompt_tokens + safety_buffer;
588                        let available = max_tokens.saturating_sub(reserved);
589                        return Some(available);
590                    }
591                }
592                // Fallback: rough estimation if tiktoken fails
593                let estimated_prompt_tokens = prompt.len().div_ceil(4); // ~4 chars per token
594                let safety_buffer = 1000;
595                let reserved = estimated_prompt_tokens + safety_buffer;
596                let available = max_tokens.saturating_sub(reserved);
597                Some(available)
598            } else {
599                // No prompt, use full token budget
600                Some(max_tokens)
601            }
602        } else {
603            None
604        }
605    }
606
607    /// Check if we should read from stdin
608    pub fn should_read_stdin(&self) -> bool {
609        use std::io::IsTerminal;
610
611        // Explicitly requested stdin
612        if self.read_stdin {
613            return true;
614        }
615
616        // If stdin is not a terminal (i.e., it's piped) and no prompt is provided
617        if !std::io::stdin().is_terminal() && self.get_prompt().is_none() {
618            return true;
619        }
620
621        false
622    }
623}
624
625#[cfg(test)]
626mod tests {
627    use super::*;
628    use std::fs;
629    use tempfile::TempDir;
630
631    impl Config {
632        /// Helper function for creating Config instances in tests
633        #[allow(dead_code)]
634        fn new_for_test(paths: Option<Vec<PathBuf>>) -> Self {
635            Self {
636                paths,
637                quiet: true, // Good default for tests
638                ..Self::default()
639            }
640        }
641
642        /// Helper function for creating Config instances with include patterns in tests
643        #[allow(dead_code)]
644        fn new_for_test_with_include(include: Option<Vec<String>>) -> Self {
645            Self {
646                include,
647                quiet: true, // Good default for tests
648                ..Self::default()
649            }
650        }
651    }
652
653    #[test]
654    fn test_config_validation_valid_directory() {
655        let temp_dir = TempDir::new().unwrap();
656        let config = Config {
657            paths: Some(vec![temp_dir.path().to_path_buf()]),
658            ..Default::default()
659        };
660
661        assert!(config.validate().is_ok());
662    }
663
664    #[test]
665    fn test_config_validation_invalid_directory() {
666        let config = Config {
667            paths: Some(vec![PathBuf::from("/nonexistent/directory")]),
668            ..Default::default()
669        };
670
671        assert!(config.validate().is_err());
672    }
673
674    #[test]
675    fn test_config_validation_file_as_directory() {
676        let temp_dir = TempDir::new().unwrap();
677        let file_path = temp_dir.path().join("file.txt");
678        fs::write(&file_path, "test").unwrap();
679
680        let config = Config {
681            paths: Some(vec![file_path]),
682            ..Default::default()
683        };
684
685        // Files are now allowed as paths
686        assert!(config.validate().is_ok());
687    }
688
689    #[test]
690    fn test_config_validation_invalid_output_directory() {
691        let temp_dir = TempDir::new().unwrap();
692        let config = Config {
693            paths: Some(vec![temp_dir.path().to_path_buf()]),
694            output_file: Some(PathBuf::from("/nonexistent/directory/output.md")),
695            ..Default::default()
696        };
697
698        assert!(config.validate().is_err());
699    }
700
701    #[test]
702    fn test_config_validation_mutually_exclusive_options() {
703        let temp_dir = TempDir::new().unwrap();
704        let config = Config {
705            prompt: Some("test prompt".to_string()),
706            paths: Some(vec![temp_dir.path().to_path_buf()]),
707            output_file: Some(temp_dir.path().join("output.md")),
708            ..Default::default()
709        };
710
711        assert!(config.validate().is_err());
712    }
713
714    #[test]
715    fn test_llm_tool_enum_values() {
716        assert_eq!(LlmTool::Gemini.command(), "gemini");
717        assert_eq!(LlmTool::Codex.command(), "codex");
718
719        assert!(LlmTool::Gemini
720            .install_instructions()
721            .contains("pip install"));
722        assert!(LlmTool::Codex.install_instructions().contains("github.com"));
723
724        assert_eq!(LlmTool::default(), LlmTool::Gemini);
725    }
726
727    #[test]
728    fn test_llm_tool_default_max_tokens() {
729        assert_eq!(LlmTool::Gemini.default_max_tokens(), 1_000_000);
730        assert_eq!(LlmTool::Codex.default_max_tokens(), 1_000_000);
731    }
732
733    #[test]
734    fn test_config_get_effective_max_tokens_with_explicit() {
735        let config = Config {
736            prompt: Some("test prompt".to_string()),
737            max_tokens: Some(500_000),
738            llm_tool: LlmTool::Gemini,
739            ..Config::new_for_test(None)
740        };
741        assert_eq!(config.get_effective_max_tokens(), Some(500_000));
742    }
743
744    #[test]
745    fn test_config_get_effective_max_tokens_with_prompt_default() {
746        let config = Config {
747            prompt: Some("test prompt".to_string()),
748            max_tokens: None,
749            llm_tool: LlmTool::Gemini,
750            ..Config::new_for_test(None)
751        };
752        assert_eq!(config.get_effective_max_tokens(), Some(1_000_000));
753    }
754
755    #[test]
756    fn test_config_get_effective_max_tokens_no_prompt() {
757        let config = Config {
758            prompt: None,
759            max_tokens: None,
760            llm_tool: LlmTool::Gemini,
761            ..Config::new_for_test(None)
762        };
763        assert_eq!(config.get_effective_max_tokens(), None);
764    }
765
766    #[test]
767    fn test_config_get_effective_max_tokens_with_config_gemini() {
768        use crate::config::TokenLimits;
769
770        let config = Config {
771            prompt: Some("test prompt".to_string()),
772            max_tokens: None,
773            llm_tool: LlmTool::Gemini,
774            config_token_limits: Some(TokenLimits {
775                gemini: Some(2_500_000),
776                codex: Some(1_800_000),
777            }),
778            ..Config::new_for_test(None)
779        };
780        assert_eq!(config.get_effective_max_tokens(), Some(2_500_000));
781    }
782
783    #[test]
784    fn test_config_get_effective_max_tokens_with_config_codex() {
785        use crate::config::TokenLimits;
786
787        let config = Config {
788            prompt: Some("test prompt".to_string()),
789            max_tokens: None,
790            llm_tool: LlmTool::Codex,
791            config_token_limits: Some(TokenLimits {
792                gemini: Some(2_500_000),
793                codex: Some(1_800_000),
794            }),
795            ..Config::new_for_test(None)
796        };
797        assert_eq!(config.get_effective_max_tokens(), Some(1_800_000));
798    }
799
800    #[test]
801    fn test_config_get_effective_max_tokens_explicit_overrides_config() {
802        use crate::config::TokenLimits;
803
804        let config = Config {
805            prompt: Some("test prompt".to_string()),
806            max_tokens: Some(500_000), // Explicit value should override config
807            llm_tool: LlmTool::Gemini,
808            config_token_limits: Some(TokenLimits {
809                gemini: Some(2_500_000),
810                codex: Some(1_800_000),
811            }),
812            ..Config::new_for_test(None)
813        };
814        assert_eq!(config.get_effective_max_tokens(), Some(500_000));
815    }
816
817    #[test]
818    fn test_config_get_effective_max_tokens_config_partial_gemini() {
819        use crate::config::TokenLimits;
820
821        let config = Config {
822            prompt: Some("test prompt".to_string()),
823            max_tokens: None,
824            llm_tool: LlmTool::Gemini,
825            config_token_limits: Some(TokenLimits {
826                gemini: Some(3_000_000),
827                codex: None, // Codex not configured
828            }),
829            ..Config::new_for_test(None)
830        };
831        assert_eq!(config.get_effective_max_tokens(), Some(3_000_000));
832    }
833
834    #[test]
835    fn test_config_get_effective_max_tokens_config_partial_codex() {
836        use crate::config::TokenLimits;
837
838        let config = Config {
839            prompt: Some("test prompt".to_string()),
840            max_tokens: None,
841            llm_tool: LlmTool::Codex,
842            config_token_limits: Some(TokenLimits {
843                gemini: None, // Gemini not configured
844                codex: Some(1_200_000),
845            }),
846            ..Config::new_for_test(None)
847        };
848        assert_eq!(config.get_effective_max_tokens(), Some(1_200_000));
849    }
850
851    #[test]
852    fn test_config_get_effective_max_tokens_config_fallback_to_default() {
853        use crate::config::TokenLimits;
854
855        let config = Config {
856            prompt: Some("test prompt".to_string()),
857            max_tokens: None,
858            llm_tool: LlmTool::Gemini,
859            config_token_limits: Some(TokenLimits {
860                gemini: None, // No limit configured for Gemini
861                codex: Some(1_800_000),
862            }),
863            ..Config::new_for_test(None)
864        };
865        // Should fall back to hard-coded default
866        assert_eq!(config.get_effective_max_tokens(), Some(1_000_000));
867    }
868
869    #[test]
870    fn test_llm_tool_default_max_tokens_with_config() {
871        use crate::config::TokenLimits;
872
873        let token_limits = TokenLimits {
874            gemini: Some(2_500_000),
875            codex: Some(1_800_000),
876        };
877
878        assert_eq!(
879            LlmTool::Gemini.default_max_tokens_with_config(Some(&token_limits)),
880            2_500_000
881        );
882        assert_eq!(
883            LlmTool::Codex.default_max_tokens_with_config(Some(&token_limits)),
884            1_800_000
885        );
886    }
887
888    #[test]
889    fn test_llm_tool_default_max_tokens_with_config_partial() {
890        use crate::config::TokenLimits;
891
892        let token_limits = TokenLimits {
893            gemini: Some(3_000_000),
894            codex: None, // Codex not configured
895        };
896
897        assert_eq!(
898            LlmTool::Gemini.default_max_tokens_with_config(Some(&token_limits)),
899            3_000_000
900        );
901        // Should fall back to hard-coded default
902        assert_eq!(
903            LlmTool::Codex.default_max_tokens_with_config(Some(&token_limits)),
904            1_000_000
905        );
906    }
907
908    #[test]
909    fn test_llm_tool_default_max_tokens_with_no_config() {
910        assert_eq!(
911            LlmTool::Gemini.default_max_tokens_with_config(None),
912            1_000_000
913        );
914        assert_eq!(
915            LlmTool::Codex.default_max_tokens_with_config(None),
916            1_000_000
917        );
918    }
919
920    #[test]
921    fn test_get_effective_context_tokens_with_prompt() {
922        let config = Config {
923            prompt: Some("This is a test prompt".to_string()),
924            max_tokens: Some(10000),
925            llm_tool: LlmTool::Gemini,
926            ..Config::new_for_test(None)
927        };
928
929        let context_tokens = config.get_effective_context_tokens().unwrap();
930        // Should be less than max_tokens due to prompt + safety buffer reservation
931        assert!(context_tokens < 10000);
932        // Should be at least max_tokens - 1000 (safety buffer) - prompt tokens
933        assert!(context_tokens > 8000); // Conservative estimate
934    }
935
936    #[test]
937    fn test_get_effective_context_tokens_no_prompt() {
938        let config = Config {
939            prompt: None,
940            max_tokens: Some(10000),
941            llm_tool: LlmTool::Gemini,
942            ..Config::new_for_test(None)
943        };
944
945        // Without prompt, should use full token budget
946        assert_eq!(config.get_effective_context_tokens(), Some(10000));
947    }
948
949    #[test]
950    fn test_get_effective_context_tokens_no_limit() {
951        let config = Config {
952            prompt: None, // No prompt means no auto-limits
953            max_tokens: None,
954            llm_tool: LlmTool::Gemini,
955            ..Config::new_for_test(None)
956        };
957
958        // No max tokens configured and no prompt, should return None
959        assert_eq!(config.get_effective_context_tokens(), None);
960    }
961
962    #[test]
963    fn test_get_effective_context_tokens_with_config_limits() {
964        use crate::config::TokenLimits;
965
966        let config = Config {
967            prompt: Some("This is a longer test prompt for token counting".to_string()),
968            max_tokens: None, // Use config limits instead
969            llm_tool: LlmTool::Gemini,
970            config_token_limits: Some(TokenLimits {
971                gemini: Some(50000),
972                codex: Some(40000),
973            }),
974            ..Config::new_for_test(None)
975        };
976
977        let context_tokens = config.get_effective_context_tokens().unwrap();
978        // Should be less than config limit due to prompt reservation
979        assert!(context_tokens < 50000);
980        assert!(context_tokens > 45000); // Should be most of the budget
981    }
982
983    #[test]
984    fn test_config_validation_output_file_in_current_dir() {
985        let temp_dir = TempDir::new().unwrap();
986        let config = Config {
987            paths: Some(vec![temp_dir.path().to_path_buf()]),
988            output_file: Some(PathBuf::from("output.md")),
989            ..Default::default()
990        };
991
992        // Should not error for files in current directory
993        assert!(config.validate().is_ok());
994    }
995
996    #[test]
997    fn test_config_load_from_file_no_config() {
998        let temp_dir = TempDir::new().unwrap();
999        let mut config = Config {
1000            paths: Some(vec![temp_dir.path().to_path_buf()]),
1001            ..Default::default()
1002        };
1003
1004        // Should not error when no config file is found
1005        assert!(config.load_from_file().is_ok());
1006    }
1007
1008    #[test]
1009    fn test_parse_directories() {
1010        use clap::Parser;
1011
1012        // Test single directory
1013        let args = vec!["context-creator", "/path/one"];
1014        let config = Config::parse_from(args);
1015        assert_eq!(config.paths.as_ref().unwrap().len(), 1);
1016        assert_eq!(
1017            config.paths.as_ref().unwrap()[0],
1018            PathBuf::from("/path/one")
1019        );
1020    }
1021
1022    #[test]
1023    fn test_parse_multiple_directories() {
1024        use clap::Parser;
1025
1026        // Test multiple directories
1027        let args = vec!["context-creator", "/path/one", "/path/two", "/path/three"];
1028        let config = Config::parse_from(args);
1029        assert_eq!(config.paths.as_ref().unwrap().len(), 3);
1030        assert_eq!(
1031            config.paths.as_ref().unwrap()[0],
1032            PathBuf::from("/path/one")
1033        );
1034        assert_eq!(
1035            config.paths.as_ref().unwrap()[1],
1036            PathBuf::from("/path/two")
1037        );
1038        assert_eq!(
1039            config.paths.as_ref().unwrap()[2],
1040            PathBuf::from("/path/three")
1041        );
1042
1043        // Test with explicit prompt
1044        let args = vec!["context-creator", "--prompt", "Find duplicated patterns"];
1045        let config = Config::parse_from(args);
1046        assert_eq!(config.prompt, Some("Find duplicated patterns".to_string()));
1047    }
1048
1049    #[test]
1050    fn test_validate_multiple_directories() {
1051        let temp_dir = TempDir::new().unwrap();
1052        let dir1 = temp_dir.path().join("dir1");
1053        let dir2 = temp_dir.path().join("dir2");
1054        fs::create_dir(&dir1).unwrap();
1055        fs::create_dir(&dir2).unwrap();
1056
1057        // All directories exist - should succeed
1058        let config = Config {
1059            paths: Some(vec![dir1.clone(), dir2.clone()]),
1060            ..Default::default()
1061        };
1062        assert!(config.validate().is_ok());
1063
1064        // One directory doesn't exist - should fail
1065        let config = Config {
1066            paths: Some(vec![dir1, PathBuf::from("/nonexistent/dir")]),
1067            ..Default::default()
1068        };
1069        assert!(config.validate().is_err());
1070    }
1071
1072    #[test]
1073    fn test_validate_files_as_directories() {
1074        let temp_dir = TempDir::new().unwrap();
1075        let dir1 = temp_dir.path().join("dir1");
1076        let file1 = temp_dir.path().join("file.txt");
1077        fs::create_dir(&dir1).unwrap();
1078        fs::write(&file1, "test content").unwrap();
1079
1080        // Mix of directory and file - now allowed
1081        let config = Config {
1082            paths: Some(vec![dir1, file1]),
1083            ..Default::default()
1084        };
1085        assert!(config.validate().is_ok());
1086    }
1087}
context_creator/cli.rs

context_creator/
cli.rs