code_digest/
cli.rs

1//! Command-line interface configuration and parsing
2
3use clap::{Parser, ValueEnum};
4use std::path::PathBuf;
5
6/// Help message explaining custom priority rules and usage
7const AFTER_HELP_MSG: &str = "\
8CUSTOM PRIORITY RULES:
9  Custom priority rules are processed in a 'first-match-wins' basis. Rules are 
10  evaluated in the order they are defined in your .code-digest.toml configuration 
11  file. The first rule that matches a given file will be used, and all subsequent 
12  rules will be ignored for that file.
13
14  Example configuration:
15    [[priorities]]
16    pattern = \"src/**/*.rs\"
17    weight = 10.0
18    
19    [[priorities]]  
20    pattern = \"tests/*\"
21    weight = -2.0
22
23USAGE EXAMPLES:
24  # Process current directory with a prompt
25  code-digest --prompt \"Analyze this code\"
26  
27  # Process specific directories  
28  code-digest src/ tests/ docs/
29  
30  # Process a GitHub repository
31  code-digest --repo https://github.com/owner/repo
32  
33  # Read prompt from stdin
34  echo \"Review this code\" | code-digest --stdin .
35";
36
37/// Supported LLM CLI tools
38#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum, Default)]
39pub enum LlmTool {
40    /// Use gemini (default)
41    #[value(name = "gemini")]
42    #[default]
43    Gemini,
44    /// Use codex CLI
45    #[value(name = "codex")]
46    Codex,
47}
48
49impl LlmTool {
50    /// Get the command name for the tool
51    pub fn command(&self) -> &'static str {
52        match self {
53            LlmTool::Gemini => "gemini",
54            LlmTool::Codex => "codex",
55        }
56    }
57
58    /// Get the installation instructions for the tool
59    pub fn install_instructions(&self) -> &'static str {
60        match self {
61            LlmTool::Gemini => "Please install gemini with: pip install gemini",
62            LlmTool::Codex => {
63                "Please install codex CLI from: https://github.com/microsoft/codex-cli"
64            }
65        }
66    }
67}
68
69/// High-performance CLI tool to convert codebases to Markdown for LLM context
70#[derive(Parser, Debug, Clone)]
71#[command(author, version, about, long_about = None, after_help = AFTER_HELP_MSG)]
72#[command(group(
73    clap::ArgGroup::new("input_source")
74        .required(true)
75        .args(&["prompt", "paths", "repo", "read_stdin"]),
76))]
77pub struct Config {
78    /// The prompt to send to the LLM for processing
79    #[arg(short = 'p', long = "prompt", help = "Process a text prompt directly")]
80    pub prompt: Option<String>,
81
82    /// One or more directory or file paths to process
83    #[arg(value_name = "PATHS", help = "Process files and directories")]
84    pub paths: Option<Vec<PathBuf>>,
85
86    /// GitHub repository URL to analyze (e.g., <https://github.com/owner/repo>)
87    #[arg(long, help = "Process a GitHub repository")]
88    pub repo: Option<String>,
89
90    /// Read prompt from stdin
91    #[arg(long = "stdin", help = "Read prompt from standard input")]
92    pub read_stdin: bool,
93
94    /// The path to the output Markdown file. If used, won't call the LLM CLI
95    #[arg(short = 'o', long)]
96    pub output_file: Option<PathBuf>,
97
98    /// Maximum number of tokens for the generated codebase context
99    #[arg(long)]
100    pub max_tokens: Option<usize>,
101
102    /// LLM CLI tool to use for processing
103    #[arg(short = 't', long = "tool", default_value = "gemini")]
104    pub llm_tool: LlmTool,
105
106    /// Suppress all output except for errors and the final LLM response
107    #[arg(short = 'q', long)]
108    pub quiet: bool,
109
110    /// Enable verbose logging
111    #[arg(short = 'v', long)]
112    pub verbose: bool,
113
114    /// Path to configuration file
115    #[arg(short = 'c', long)]
116    pub config: Option<PathBuf>,
117
118    /// Show progress indicators during processing
119    #[arg(long)]
120    pub progress: bool,
121
122    /// Copy output to system clipboard instead of stdout
123    #[arg(short = 'C', long)]
124    pub copy: bool,
125
126    /// Enable enhanced context with file metadata
127    #[arg(long = "enhanced-context")]
128    pub enhanced_context: bool,
129
130    /// Custom priority rules loaded from config file (not a CLI argument)
131    #[clap(skip)]
132    pub custom_priorities: Vec<crate::config::Priority>,
133}
134
135impl Config {
136    /// Validate the configuration
137    pub fn validate(&self) -> Result<(), crate::utils::error::CodeDigestError> {
138        use crate::utils::error::CodeDigestError;
139
140        // Validate repo URL if provided
141        if let Some(repo_url) = &self.repo {
142            if !repo_url.starts_with("https://github.com/")
143                && !repo_url.starts_with("http://github.com/")
144            {
145                return Err(CodeDigestError::InvalidConfiguration(
146                    "Repository URL must be a GitHub URL (https://github.com/owner/repo)"
147                        .to_string(),
148                ));
149            }
150        } else {
151            // Only validate directories if repo is not provided
152            let directories = self.get_directories();
153            for directory in &directories {
154                if !directory.exists() {
155                    return Err(CodeDigestError::InvalidPath(format!(
156                        "Directory does not exist: {}",
157                        directory.display()
158                    )));
159                }
160
161                if !directory.is_dir() {
162                    return Err(CodeDigestError::InvalidPath(format!(
163                        "Path is not a directory: {}",
164                        directory.display()
165                    )));
166                }
167            }
168        }
169
170        // Validate output file parent directory exists if specified
171        if let Some(output) = &self.output_file {
172            if let Some(parent) = output.parent() {
173                // Handle empty parent (current directory) and check if parent exists
174                if !parent.as_os_str().is_empty() && !parent.exists() {
175                    return Err(CodeDigestError::InvalidPath(format!(
176                        "Output directory does not exist: {}",
177                        parent.display()
178                    )));
179                }
180            }
181        }
182
183        // Validate mutually exclusive options
184        if self.output_file.is_some() && self.get_prompt().is_some() {
185            return Err(CodeDigestError::InvalidConfiguration(
186                "Cannot specify both --output and a prompt".to_string(),
187            ));
188        }
189
190        // Validate copy and output mutual exclusivity
191        if self.copy && self.output_file.is_some() {
192            return Err(CodeDigestError::InvalidConfiguration(
193                "Cannot specify both --copy and --output".to_string(),
194            ));
195        }
196
197        Ok(())
198    }
199
200    /// Load configuration from file if specified
201    pub fn load_from_file(&mut self) -> Result<(), crate::utils::error::CodeDigestError> {
202        use crate::config::ConfigFile;
203
204        let config_file = if let Some(ref config_path) = self.config {
205            // Load from specified config file
206            Some(ConfigFile::load_from_file(config_path)?)
207        } else {
208            // Try to load from default locations
209            ConfigFile::load_default()?
210        };
211
212        if let Some(config_file) = config_file {
213            // Store custom priorities for the walker
214            self.custom_priorities = config_file.priorities.clone();
215
216            config_file.apply_to_cli_config(self);
217
218            if self.verbose {
219                if let Some(ref config_path) = self.config {
220                    eprintln!("📄 Loaded configuration from: {}", config_path.display());
221                } else {
222                    eprintln!("📄 Loaded configuration from default location");
223                }
224            }
225        }
226
227        Ok(())
228    }
229
230    /// Get the prompt from the explicit prompt flag
231    pub fn get_prompt(&self) -> Option<String> {
232        self.prompt.as_ref().filter(|s| !s.trim().is_empty()).cloned()
233    }
234
235    /// Get all directories from paths argument
236    pub fn get_directories(&self) -> Vec<PathBuf> {
237        self.paths.as_ref().cloned().unwrap_or_else(|| vec![PathBuf::from(".")])
238    }
239
240    /// Check if we should read from stdin
241    pub fn should_read_stdin(&self) -> bool {
242        use std::io::IsTerminal;
243
244        // Explicitly requested stdin
245        if self.read_stdin {
246            return true;
247        }
248
249        // If stdin is not a terminal (i.e., it's piped) and no prompt is provided
250        if !std::io::stdin().is_terminal() && self.get_prompt().is_none() {
251            return true;
252        }
253
254        false
255    }
256}
257
258#[cfg(test)]
259mod tests {
260    use super::*;
261    use std::fs;
262    use tempfile::TempDir;
263
264    #[test]
265    fn test_config_validation_valid_directory() {
266        let temp_dir = TempDir::new().unwrap();
267        let config = Config {
268            prompt: None,
269            paths: Some(vec![temp_dir.path().to_path_buf()]),
270            repo: None,
271            read_stdin: false,
272            output_file: None,
273            max_tokens: None,
274            llm_tool: LlmTool::default(),
275            quiet: false,
276            verbose: false,
277            config: None,
278            progress: false,
279            copy: false,
280            enhanced_context: false,
281            custom_priorities: vec![],
282        };
283
284        assert!(config.validate().is_ok());
285    }
286
287    #[test]
288    fn test_config_validation_invalid_directory() {
289        let config = Config {
290            prompt: None,
291            paths: Some(vec![PathBuf::from("/nonexistent/directory")]),
292            repo: None,
293            read_stdin: false,
294            output_file: None,
295            max_tokens: None,
296            llm_tool: LlmTool::default(),
297            quiet: false,
298            verbose: false,
299            config: None,
300            progress: false,
301            copy: false,
302            enhanced_context: false,
303            custom_priorities: vec![],
304        };
305
306        assert!(config.validate().is_err());
307    }
308
309    #[test]
310    fn test_config_validation_file_as_directory() {
311        let temp_dir = TempDir::new().unwrap();
312        let file_path = temp_dir.path().join("file.txt");
313        fs::write(&file_path, "test").unwrap();
314
315        let config = Config {
316            prompt: None,
317            paths: Some(vec![file_path]),
318            repo: None,
319            read_stdin: false,
320            output_file: None,
321            max_tokens: None,
322            llm_tool: LlmTool::default(),
323            quiet: false,
324            verbose: false,
325            config: None,
326            progress: false,
327            copy: false,
328            enhanced_context: false,
329            custom_priorities: vec![],
330        };
331
332        assert!(config.validate().is_err());
333    }
334
335    #[test]
336    fn test_config_validation_invalid_output_directory() {
337        let temp_dir = TempDir::new().unwrap();
338        let config = Config {
339            prompt: None,
340            paths: Some(vec![temp_dir.path().to_path_buf()]),
341            repo: None,
342            read_stdin: false,
343            output_file: Some(PathBuf::from("/nonexistent/directory/output.md")),
344            max_tokens: None,
345            llm_tool: LlmTool::default(),
346            quiet: false,
347            verbose: false,
348            config: None,
349            progress: false,
350            copy: false,
351            enhanced_context: false,
352            custom_priorities: vec![],
353        };
354
355        assert!(config.validate().is_err());
356    }
357
358    #[test]
359    fn test_config_validation_mutually_exclusive_options() {
360        let temp_dir = TempDir::new().unwrap();
361        let config = Config {
362            prompt: Some("test prompt".to_string()),
363            paths: Some(vec![temp_dir.path().to_path_buf()]),
364            repo: None,
365            read_stdin: false,
366            output_file: Some(temp_dir.path().join("output.md")),
367            max_tokens: None,
368            llm_tool: LlmTool::default(),
369            quiet: false,
370            verbose: false,
371            config: None,
372            progress: false,
373            copy: false,
374            enhanced_context: false,
375            custom_priorities: vec![],
376        };
377
378        assert!(config.validate().is_err());
379    }
380
381    #[test]
382    fn test_llm_tool_enum_values() {
383        assert_eq!(LlmTool::Gemini.command(), "gemini");
384        assert_eq!(LlmTool::Codex.command(), "codex");
385
386        assert!(LlmTool::Gemini.install_instructions().contains("pip install"));
387        assert!(LlmTool::Codex.install_instructions().contains("github.com"));
388
389        assert_eq!(LlmTool::default(), LlmTool::Gemini);
390    }
391
392    #[test]
393    fn test_config_validation_output_file_in_current_dir() {
394        let temp_dir = TempDir::new().unwrap();
395        let config = Config {
396            prompt: None,
397            paths: Some(vec![temp_dir.path().to_path_buf()]),
398            repo: None,
399            read_stdin: false,
400            output_file: Some(PathBuf::from("output.md")),
401            max_tokens: None,
402            llm_tool: LlmTool::default(),
403            quiet: false,
404            verbose: false,
405            config: None,
406            progress: false,
407            copy: false,
408            enhanced_context: false,
409            custom_priorities: vec![],
410        };
411
412        // Should not error for files in current directory
413        assert!(config.validate().is_ok());
414    }
415
416    #[test]
417    fn test_config_load_from_file_no_config() {
418        let temp_dir = TempDir::new().unwrap();
419        let mut config = Config {
420            prompt: None,
421            paths: Some(vec![temp_dir.path().to_path_buf()]),
422            repo: None,
423            read_stdin: false,
424            output_file: None,
425            max_tokens: None,
426            llm_tool: LlmTool::default(),
427            quiet: false,
428            verbose: false,
429            config: None,
430            progress: false,
431            copy: false,
432            enhanced_context: false,
433            custom_priorities: vec![],
434        };
435
436        // Should not error when no config file is found
437        assert!(config.load_from_file().is_ok());
438    }
439
440    #[test]
441    fn test_parse_directories() {
442        use clap::Parser;
443
444        // Test single directory
445        let args = vec!["code-digest", "/path/one"];
446        let config = Config::parse_from(args);
447        assert_eq!(config.paths.as_ref().unwrap().len(), 1);
448        assert_eq!(config.paths.as_ref().unwrap()[0], PathBuf::from("/path/one"));
449    }
450
451    #[test]
452    fn test_parse_multiple_directories() {
453        use clap::Parser;
454
455        // Test multiple directories
456        let args = vec!["code-digest", "/path/one", "/path/two", "/path/three"];
457        let config = Config::parse_from(args);
458        assert_eq!(config.paths.as_ref().unwrap().len(), 3);
459        assert_eq!(config.paths.as_ref().unwrap()[0], PathBuf::from("/path/one"));
460        assert_eq!(config.paths.as_ref().unwrap()[1], PathBuf::from("/path/two"));
461        assert_eq!(config.paths.as_ref().unwrap()[2], PathBuf::from("/path/three"));
462
463        // Test with explicit prompt
464        let args = vec!["code-digest", "--prompt", "Find duplicated patterns"];
465        let config = Config::parse_from(args);
466        assert_eq!(config.prompt, Some("Find duplicated patterns".to_string()));
467    }
468
469    #[test]
470    fn test_validate_multiple_directories() {
471        let temp_dir = TempDir::new().unwrap();
472        let dir1 = temp_dir.path().join("dir1");
473        let dir2 = temp_dir.path().join("dir2");
474        fs::create_dir(&dir1).unwrap();
475        fs::create_dir(&dir2).unwrap();
476
477        // All directories exist - should succeed
478        let config = Config {
479            prompt: None,
480            paths: Some(vec![dir1.clone(), dir2.clone()]),
481            repo: None,
482            read_stdin: false,
483            output_file: None,
484            max_tokens: None,
485            llm_tool: LlmTool::default(),
486            quiet: false,
487            verbose: false,
488            config: None,
489            progress: false,
490            copy: false,
491            enhanced_context: false,
492            custom_priorities: vec![],
493        };
494        assert!(config.validate().is_ok());
495
496        // One directory doesn't exist - should fail
497        let config = Config {
498            prompt: None,
499            paths: Some(vec![dir1, PathBuf::from("/nonexistent/dir")]),
500            repo: None,
501            read_stdin: false,
502            output_file: None,
503            max_tokens: None,
504            llm_tool: LlmTool::default(),
505            quiet: false,
506            verbose: false,
507            config: None,
508            progress: false,
509            copy: false,
510            enhanced_context: false,
511            custom_priorities: vec![],
512        };
513        assert!(config.validate().is_err());
514    }
515
516    #[test]
517    fn test_validate_files_as_directories() {
518        let temp_dir = TempDir::new().unwrap();
519        let dir1 = temp_dir.path().join("dir1");
520        let file1 = temp_dir.path().join("file.txt");
521        fs::create_dir(&dir1).unwrap();
522        fs::write(&file1, "test content").unwrap();
523
524        // Mix of directory and file - should fail
525        let config = Config {
526            prompt: None,
527            paths: Some(vec![dir1, file1]),
528            repo: None,
529            read_stdin: false,
530            output_file: None,
531            max_tokens: None,
532            llm_tool: LlmTool::default(),
533            quiet: false,
534            verbose: false,
535            config: None,
536            progress: false,
537            copy: false,
538            enhanced_context: false,
539            custom_priorities: vec![],
540        };
541        assert!(config.validate().is_err());
542    }
543}