cli_testing_specialist/analyzer/
cli_parser.rs

1use crate::analyzer::SubcommandDetector;
2use crate::error::{CliTestError, Result};
3use crate::types::analysis::{CliAnalysis, CliOption, OptionType};
4use crate::utils::{execute_with_timeout, validate_binary_path, ResourceLimits};
5use lazy_static::lazy_static;
6use regex::Regex;
7use std::path::Path;
8use std::time::Instant;
9
10lazy_static! {
11    /// Regex pattern for short options: -h, -v, etc.
12    static ref SHORT_OPTION: Regex = Regex::new(r"-([a-zA-Z])(?:\s|,|$)").unwrap();
13
14    /// Regex pattern for long options: --help, --verbose, etc.
15    static ref LONG_OPTION: Regex = Regex::new(r"--([a-z][a-z0-9-]+)").unwrap();
16
17    /// Regex pattern for version strings: v1.0.0, 2.5.3, etc.
18    static ref VERSION_PATTERN: Regex = Regex::new(r"\b\d+\.\d+(?:\.\d+)?(?:-[a-z0-9.]+)?\b").unwrap();
19
20    /// Regex pattern for option with value: --name <value>, --file <path>
21    static ref OPTION_WITH_VALUE: Regex = Regex::new(r"--([a-z][a-z0-9-]+)\s+<([^>]+)>").unwrap();
22
23    /// Regex pattern for option description (tries to capture text after option)
24    static ref OPTION_DESCRIPTION: Regex = Regex::new(r"(?:--[a-z][a-z0-9-]+)(?:\s+<[^>]+>)?\s+(.+)").unwrap();
25}
26
27/// CLI Parser - Executes binaries and parses help output
28pub struct CliParser {
29    resource_limits: ResourceLimits,
30}
31
32impl CliParser {
33    /// Create a new CLI parser with default resource limits
34    pub fn new() -> Self {
35        Self {
36            resource_limits: ResourceLimits::default(),
37        }
38    }
39
40    /// Create a new CLI parser with custom resource limits
41    pub fn with_limits(resource_limits: ResourceLimits) -> Self {
42        Self { resource_limits }
43    }
44
45    /// Analyze a CLI binary and extract its structure
46    ///
47    /// This performs the following steps:
48    /// 1. Validate binary path
49    /// 2. Execute with --help to get help output
50    /// 3. Execute with --version to get version string
51    /// 4. Parse help output to extract options
52    /// 5. Detect subcommands recursively
53    /// 6. Build CliAnalysis structure
54    pub fn analyze(&self, binary_path: &Path) -> Result<CliAnalysis> {
55        let start_time = Instant::now();
56
57        // Step 1: Validate binary
58        let canonical_path = validate_binary_path(binary_path)?;
59        log::info!("Analyzing binary: {}", canonical_path.display());
60
61        // Extract binary name
62        let binary_name = canonical_path
63            .file_name()
64            .and_then(|n| n.to_str())
65            .ok_or_else(|| CliTestError::BinaryNotFound(canonical_path.clone()))?
66            .to_string();
67
68        // Step 2: Execute with --help
69        let help_output = self.execute_help(&canonical_path)?;
70
71        if help_output.trim().is_empty() {
72            return Err(CliTestError::InvalidHelpOutput);
73        }
74
75        // Step 3: Try to get version
76        let version = self.try_get_version(&canonical_path);
77
78        // Step 4: Parse options from help output
79        let global_options = self.parse_options(&help_output);
80
81        // Step 5: Detect subcommands recursively
82        let subcommand_detector = SubcommandDetector::default();
83        let subcommands = subcommand_detector
84            .detect(&canonical_path, &help_output)
85            .unwrap_or_default();
86
87        // Step 6: Build analysis result
88        let mut analysis = CliAnalysis::new(canonical_path, binary_name, help_output);
89        analysis.version = version;
90        analysis.global_options = global_options;
91        analysis.subcommands = subcommands;
92
93        // Update metadata
94        let duration_ms = start_time.elapsed().as_millis() as u64;
95        analysis.update_metadata(duration_ms);
96
97        log::info!(
98            "Analysis complete: {} options, {} subcommands found in {}ms",
99            analysis.metadata.total_options,
100            analysis.subcommands.len(),
101            duration_ms
102        );
103
104        Ok(analysis)
105    }
106
107    /// Execute binary with --help flag
108    fn execute_help(&self, binary: &Path) -> Result<String> {
109        log::debug!("Executing {} --help", binary.display());
110
111        // Try --help first (most common)
112        match execute_with_timeout(binary, &["--help"], self.resource_limits.timeout()) {
113            Ok(output) => Ok(output),
114            Err(_) => {
115                // Try -h as fallback
116                log::debug!("--help failed, trying -h");
117                match execute_with_timeout(binary, &["-h"], self.resource_limits.timeout()) {
118                    Ok(output) => Ok(output),
119                    Err(_) => {
120                        // Try 'help' subcommand as last resort
121                        log::debug!("-h failed, trying 'help' subcommand");
122                        execute_with_timeout(binary, &["help"], self.resource_limits.timeout())
123                    }
124                }
125            }
126        }
127    }
128
129    /// Try to get version string from binary
130    fn try_get_version(&self, binary: &Path) -> Option<String> {
131        log::debug!("Attempting to get version for {}", binary.display());
132
133        // Try --version
134        if let Ok(output) =
135            execute_with_timeout(binary, &["--version"], self.resource_limits.timeout())
136        {
137            if let Some(version) = self.extract_version(&output) {
138                return Some(version);
139            }
140        }
141
142        // Try -v
143        if let Ok(output) = execute_with_timeout(binary, &["-v"], self.resource_limits.timeout()) {
144            if let Some(version) = self.extract_version(&output) {
145                return Some(version);
146            }
147        }
148
149        // Try 'version' subcommand
150        if let Ok(output) =
151            execute_with_timeout(binary, &["version"], self.resource_limits.timeout())
152        {
153            if let Some(version) = self.extract_version(&output) {
154                return Some(version);
155            }
156        }
157
158        None
159    }
160
161    /// Extract version string from output
162    fn extract_version(&self, output: &str) -> Option<String> {
163        VERSION_PATTERN.find(output).map(|m| m.as_str().to_string())
164    }
165
166    /// Parse CLI options from help output
167    pub fn parse_options(&self, help_output: &str) -> Vec<CliOption> {
168        let mut options = Vec::new();
169        let mut seen_options = std::collections::HashSet::new();
170
171        for line in help_output.lines() {
172            let trimmed = line.trim();
173
174            // Skip empty lines and headers
175            if trimmed.is_empty() || !trimmed.contains('-') {
176                continue;
177            }
178
179            // Extract short and long options from the line
180            let short = SHORT_OPTION
181                .captures(trimmed)
182                .and_then(|cap| cap.get(1))
183                .map(|m| format!("-{}", m.as_str()));
184
185            let long = LONG_OPTION
186                .captures(trimmed)
187                .and_then(|cap| cap.get(1))
188                .map(|m| format!("--{}", m.as_str()));
189
190            // Skip if no option found or already processed
191            if short.is_none() && long.is_none() {
192                continue;
193            }
194
195            let option_key = format!("{:?}:{:?}", short, long);
196            if seen_options.contains(&option_key) {
197                continue;
198            }
199            seen_options.insert(option_key);
200
201            // Extract description
202            let description = OPTION_DESCRIPTION
203                .captures(trimmed)
204                .and_then(|cap| cap.get(1))
205                .map(|m| m.as_str().trim().to_string());
206
207            // Determine option type (basic inference, will be enhanced by option_inferrer)
208            let option_type = if OPTION_WITH_VALUE.is_match(trimmed) {
209                OptionType::String
210            } else {
211                OptionType::Flag
212            };
213
214            options.push(CliOption {
215                short,
216                long,
217                description,
218                option_type,
219                required: false, // Default to optional
220                default_value: None,
221            });
222        }
223
224        options
225    }
226
227    /// Parse required positional arguments from help output
228    ///
229    /// Looks for Usage line and extracts <ARG> patterns:
230    /// - "Usage: cmd [OPTIONS] <ID>" → ["ID"]
231    /// - "Usage: cmd <FILE> <OUTPUT>" → ["FILE", "OUTPUT"]
232    pub fn parse_required_args(&self, help_output: &str) -> Vec<String> {
233        lazy_static! {
234            static ref USAGE_LINE: Regex = Regex::new(r"(?i)^\s*usage:\s+").unwrap();
235            static ref REQUIRED_ARG: Regex = Regex::new(r"<([^>]+)>").unwrap();
236        }
237
238        let mut required_args = Vec::new();
239
240        for line in help_output.lines() {
241            if USAGE_LINE.is_match(line) {
242                // Extract all <ARG> patterns from the usage line
243                for cap in REQUIRED_ARG.captures_iter(line) {
244                    if let Some(arg_match) = cap.get(1) {
245                        let arg_name = arg_match.as_str().to_string();
246                        required_args.push(arg_name);
247                    }
248                }
249                break; // Only process the first Usage line
250            }
251        }
252
253        log::debug!("Detected {} required arguments", required_args.len());
254        required_args
255    }
256}
257
258impl Default for CliParser {
259    fn default() -> Self {
260        Self::new()
261    }
262}
263
264#[cfg(test)]
265mod tests {
266    use super::*;
267
268    #[test]
269    fn test_short_option_regex() {
270        assert!(SHORT_OPTION.is_match("-h"));
271        assert!(SHORT_OPTION.is_match("-v "));
272        assert!(SHORT_OPTION.is_match("-f,"));
273        assert!(!SHORT_OPTION.is_match("--help"));
274    }
275
276    #[test]
277    fn test_long_option_regex() {
278        assert!(LONG_OPTION.is_match("--help"));
279        assert!(LONG_OPTION.is_match("--verbose"));
280        assert!(LONG_OPTION.is_match("--max-size"));
281        assert!(!LONG_OPTION.is_match("-h"));
282    }
283
284    #[test]
285    fn test_version_pattern_regex() {
286        assert!(VERSION_PATTERN.is_match("1.0.0"));
287        assert!(VERSION_PATTERN.is_match("2.5.3"));
288        assert!(VERSION_PATTERN.is_match("1.0.0-alpha.1"));
289        assert!(VERSION_PATTERN.is_match("curl 7.64.1"));
290    }
291
292    #[test]
293    fn test_option_with_value_regex() {
294        assert!(OPTION_WITH_VALUE.is_match("--name <value>"));
295        assert!(OPTION_WITH_VALUE.is_match("--file <path>"));
296        assert!(!OPTION_WITH_VALUE.is_match("--verbose"));
297    }
298
299    #[test]
300    fn test_extract_version() {
301        let parser = CliParser::new();
302
303        assert_eq!(
304            parser.extract_version("curl 7.64.1"),
305            Some("7.64.1".to_string())
306        );
307        assert_eq!(
308            parser.extract_version("version 1.0.0"),
309            Some("1.0.0".to_string())
310        );
311        assert_eq!(parser.extract_version("no version here"), None);
312    }
313
314    #[test]
315    fn test_parse_options_basic() {
316        let parser = CliParser::new();
317        let help_output = r#"
318Usage: test [OPTIONS]
319
320Options:
321  -h, --help       Print help information
322  -v, --verbose    Enable verbose output
323      --name <VALUE>  Set name value
324"#;
325
326        let options = parser.parse_options(help_output);
327
328        assert_eq!(options.len(), 3);
329
330        // Check --help option
331        assert!(options.iter().any(|o| o.long == Some("--help".to_string())));
332        assert!(options.iter().any(|o| o.short == Some("-h".to_string())));
333
334        // Check --verbose option
335        assert!(options
336            .iter()
337            .any(|o| o.long == Some("--verbose".to_string())));
338    }
339
340    #[test]
341    fn test_parse_options_deduplication() {
342        let parser = CliParser::new();
343        let help_output = r#"
344  -h, --help    Help text
345  -h, --help    Duplicate help text
346"#;
347
348        let options = parser.parse_options(help_output);
349
350        // Should only have one option despite duplicate
351        assert_eq!(options.len(), 1);
352    }
353
354    #[cfg(unix)]
355    #[test]
356    fn test_analyze_ls() {
357        // Test with /bin/ls which should be available on all Unix systems
358        let ls_path = Path::new("/bin/ls");
359        if !ls_path.exists() {
360            return; // Skip if ls not available
361        }
362
363        let parser = CliParser::new();
364        let result = parser.analyze(ls_path);
365
366        assert!(result.is_ok());
367
368        let analysis = result.unwrap();
369        assert_eq!(analysis.binary_name, "ls");
370        assert!(!analysis.help_output.is_empty());
371        assert!(!analysis.global_options.is_empty());
372    }
373
374    #[cfg(unix)]
375    #[test]
376    fn test_analyze_curl() {
377        // Test with curl if available
378        let curl_path = Path::new("/usr/bin/curl");
379        if !curl_path.exists() {
380            return; // Skip if curl not available
381        }
382
383        let parser = CliParser::new();
384        let result = parser.analyze(curl_path);
385
386        assert!(result.is_ok());
387
388        let analysis = result.unwrap();
389        assert_eq!(analysis.binary_name, "curl");
390        assert!(analysis.version.is_some());
391        assert!(!analysis.global_options.is_empty());
392
393        // Curl should have many options
394        assert!(analysis.global_options.len() > 10);
395    }
396}