cli_testing_specialist/analyzer/
cli_parser.rs

1use crate::analyzer::SubcommandDetector;
2use crate::error::{CliTestError, Result};
3use crate::types::analysis::{CliAnalysis, CliOption, OptionType};
4use crate::utils::{execute_with_timeout, validate_binary_path, ResourceLimits};
5use lazy_static::lazy_static;
6use regex::Regex;
7use std::path::Path;
8use std::time::Instant;
9
10lazy_static! {
11    /// Regex pattern for short options: -h, -v, etc.
12    static ref SHORT_OPTION: Regex = Regex::new(r"-([a-zA-Z])(?:\s|,|$)").unwrap();
13
14    /// Regex pattern for long options: --help, --verbose, etc.
15    static ref LONG_OPTION: Regex = Regex::new(r"--([a-z][a-z0-9-]+)").unwrap();
16
17    /// Regex pattern for version strings: v1.0.0, 2.5.3, etc.
18    static ref VERSION_PATTERN: Regex = Regex::new(r"\b\d+\.\d+(?:\.\d+)?(?:-[a-z0-9.]+)?\b").unwrap();
19
20    /// Regex pattern for option with value: --name <value>, --file <path>
21    static ref OPTION_WITH_VALUE: Regex = Regex::new(r"--([a-z][a-z0-9-]+)\s+<([^>]+)>").unwrap();
22
23    /// Regex pattern for option description (tries to capture text after option)
24    static ref OPTION_DESCRIPTION: Regex = Regex::new(r"(?:--[a-z][a-z0-9-]+)(?:\s+<[^>]+>)?\s+(.+)").unwrap();
25}
26
27/// CLI Parser - Executes binaries and parses help output
28pub struct CliParser {
29    resource_limits: ResourceLimits,
30}
31
32impl CliParser {
33    /// Create a new CLI parser with default resource limits
34    pub fn new() -> Self {
35        Self {
36            resource_limits: ResourceLimits::default(),
37        }
38    }
39
40    /// Create a new CLI parser with custom resource limits
41    pub fn with_limits(resource_limits: ResourceLimits) -> Self {
42        Self { resource_limits }
43    }
44
45    /// Analyze a CLI binary and extract its structure
46    ///
47    /// This performs the following steps:
48    /// 1. Validate binary path
49    /// 2. Execute with --help to get help output
50    /// 3. Execute with --version to get version string
51    /// 4. Parse help output to extract options
52    /// 5. Detect subcommands recursively
53    /// 6. Build CliAnalysis structure
54    ///
55    /// # Examples
56    ///
57    /// ```no_run
58    /// use cli_testing_specialist::analyzer::CliParser;
59    /// use std::path::Path;
60    ///
61    /// let parser = CliParser::new();
62    /// let analysis = parser.analyze(Path::new("/usr/bin/curl"))?;
63    ///
64    /// println!("Binary: {}", analysis.binary_name);
65    /// println!("Version: {:?}", analysis.version);
66    /// println!("Options: {}", analysis.metadata.total_options);
67    /// println!("Subcommands: {}", analysis.subcommands.len());
68    /// # Ok::<(), cli_testing_specialist::error::CliTestError>(())
69    /// ```
70    ///
71    /// # With Custom Resource Limits
72    ///
73    /// ```no_run
74    /// use cli_testing_specialist::analyzer::CliParser;
75    /// use cli_testing_specialist::utils::ResourceLimits;
76    /// use std::path::Path;
77    /// use std::time::Duration;
78    ///
79    /// let limits = ResourceLimits::new(
80    ///     1024 * 1024 * 1024, // 1GB memory
81    ///     1024,               // file descriptors
82    ///     100,                // max processes
83    ///     Duration::from_secs(60), // timeout
84    /// );
85    ///
86    /// let parser = CliParser::with_limits(limits);
87    /// let analysis = parser.analyze(Path::new("/usr/bin/kubectl"))?;
88    /// # Ok::<(), cli_testing_specialist::error::CliTestError>(())
89    /// ```
90    pub fn analyze(&self, binary_path: &Path) -> Result<CliAnalysis> {
91        let start_time = Instant::now();
92
93        // Step 1: Validate binary
94        let canonical_path = validate_binary_path(binary_path)?;
95        log::info!("Analyzing binary: {}", canonical_path.display());
96
97        // Extract binary name
98        let binary_name = canonical_path
99            .file_name()
100            .and_then(|n| n.to_str())
101            .ok_or_else(|| CliTestError::BinaryNotFound(canonical_path.clone()))?
102            .to_string();
103
104        // Step 2: Execute with --help
105        let help_output = self.execute_help(&canonical_path)?;
106
107        if help_output.trim().is_empty() {
108            return Err(CliTestError::InvalidHelpOutput);
109        }
110
111        // Step 3: Try to get version
112        let version = self.try_get_version(&canonical_path);
113
114        // Step 4: Parse options from help output
115        let global_options = self.parse_options(&help_output);
116
117        // Step 5: Detect subcommands recursively
118        let subcommand_detector = SubcommandDetector::default();
119        let subcommands = subcommand_detector
120            .detect(&canonical_path, &help_output)
121            .unwrap_or_default();
122
123        // Step 6: Build analysis result
124        let mut analysis = CliAnalysis::new(canonical_path, binary_name, help_output);
125        analysis.version = version;
126        analysis.global_options = global_options;
127        analysis.subcommands = subcommands;
128
129        // Update metadata
130        let duration_ms = start_time.elapsed().as_millis() as u64;
131        analysis.update_metadata(duration_ms);
132
133        log::info!(
134            "Analysis complete: {} options, {} subcommands found in {}ms",
135            analysis.metadata.total_options,
136            analysis.subcommands.len(),
137            duration_ms
138        );
139
140        Ok(analysis)
141    }
142
143    /// Execute binary with --help flag
144    fn execute_help(&self, binary: &Path) -> Result<String> {
145        log::debug!("Executing {} --help", binary.display());
146
147        // Try --help first (most common)
148        match execute_with_timeout(binary, &["--help"], self.resource_limits.timeout()) {
149            Ok(output) => Ok(output),
150            Err(_) => {
151                // Try -h as fallback
152                log::debug!("--help failed, trying -h");
153                match execute_with_timeout(binary, &["-h"], self.resource_limits.timeout()) {
154                    Ok(output) => Ok(output),
155                    Err(_) => {
156                        // Try 'help' subcommand as last resort
157                        log::debug!("-h failed, trying 'help' subcommand");
158                        execute_with_timeout(binary, &["help"], self.resource_limits.timeout())
159                    }
160                }
161            }
162        }
163    }
164
165    /// Try to get version string from binary
166    fn try_get_version(&self, binary: &Path) -> Option<String> {
167        log::debug!("Attempting to get version for {}", binary.display());
168
169        // Try --version
170        if let Ok(output) =
171            execute_with_timeout(binary, &["--version"], self.resource_limits.timeout())
172        {
173            if let Some(version) = self.extract_version(&output) {
174                return Some(version);
175            }
176        }
177
178        // Try -v
179        if let Ok(output) = execute_with_timeout(binary, &["-v"], self.resource_limits.timeout()) {
180            if let Some(version) = self.extract_version(&output) {
181                return Some(version);
182            }
183        }
184
185        // Try 'version' subcommand
186        if let Ok(output) =
187            execute_with_timeout(binary, &["version"], self.resource_limits.timeout())
188        {
189            if let Some(version) = self.extract_version(&output) {
190                return Some(version);
191            }
192        }
193
194        None
195    }
196
197    /// Extract version string from output
198    fn extract_version(&self, output: &str) -> Option<String> {
199        VERSION_PATTERN.find(output).map(|m| m.as_str().to_string())
200    }
201
202    /// Parse CLI options from help output
203    pub fn parse_options(&self, help_output: &str) -> Vec<CliOption> {
204        let mut options = Vec::new();
205        let mut seen_options = std::collections::HashSet::new();
206
207        for line in help_output.lines() {
208            let trimmed = line.trim();
209
210            // Skip empty lines and headers
211            if trimmed.is_empty() || !trimmed.contains('-') {
212                continue;
213            }
214
215            // Extract short and long options from the line
216            let short = SHORT_OPTION
217                .captures(trimmed)
218                .and_then(|cap| cap.get(1))
219                .map(|m| format!("-{}", m.as_str()));
220
221            let long = LONG_OPTION
222                .captures(trimmed)
223                .and_then(|cap| cap.get(1))
224                .map(|m| format!("--{}", m.as_str()));
225
226            // Skip if no option found or already processed
227            if short.is_none() && long.is_none() {
228                continue;
229            }
230
231            let option_key = format!("{:?}:{:?}", short, long);
232            if seen_options.contains(&option_key) {
233                continue;
234            }
235            seen_options.insert(option_key);
236
237            // Extract description
238            let description = OPTION_DESCRIPTION
239                .captures(trimmed)
240                .and_then(|cap| cap.get(1))
241                .map(|m| m.as_str().trim().to_string());
242
243            // Determine option type (basic inference, will be enhanced by option_inferrer)
244            let option_type = if OPTION_WITH_VALUE.is_match(trimmed) {
245                OptionType::String
246            } else {
247                OptionType::Flag
248            };
249
250            options.push(CliOption {
251                short,
252                long,
253                description,
254                option_type,
255                required: false, // Default to optional
256                default_value: None,
257            });
258        }
259
260        options
261    }
262
263    /// Parse required positional arguments from help output
264    ///
265    /// Looks for Usage line and extracts `<ARG>` patterns:
266    /// - "Usage: cmd \[OPTIONS\] `<ID>`" → \["ID"\]
267    /// - "Usage: cmd `<FILE>` `<OUTPUT>`" → \["FILE", "OUTPUT"\]
268    pub fn parse_required_args(&self, help_output: &str) -> Vec<String> {
269        lazy_static! {
270            static ref USAGE_LINE: Regex = Regex::new(r"(?i)^\s*usage:\s+").unwrap();
271            static ref REQUIRED_ARG: Regex = Regex::new(r"<([^>]+)>").unwrap();
272        }
273
274        let mut required_args = Vec::new();
275
276        for line in help_output.lines() {
277            if USAGE_LINE.is_match(line) {
278                // Extract all <ARG> patterns from the usage line
279                for cap in REQUIRED_ARG.captures_iter(line) {
280                    if let Some(arg_match) = cap.get(1) {
281                        let arg_name = arg_match.as_str().to_string();
282                        required_args.push(arg_name);
283                    }
284                }
285                break; // Only process the first Usage line
286            }
287        }
288
289        log::debug!("Detected {} required arguments", required_args.len());
290        required_args
291    }
292}
293
294impl Default for CliParser {
295    fn default() -> Self {
296        Self::new()
297    }
298}
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303
304    #[test]
305    fn test_short_option_regex() {
306        assert!(SHORT_OPTION.is_match("-h"));
307        assert!(SHORT_OPTION.is_match("-v "));
308        assert!(SHORT_OPTION.is_match("-f,"));
309        assert!(!SHORT_OPTION.is_match("--help"));
310    }
311
312    #[test]
313    fn test_long_option_regex() {
314        assert!(LONG_OPTION.is_match("--help"));
315        assert!(LONG_OPTION.is_match("--verbose"));
316        assert!(LONG_OPTION.is_match("--max-size"));
317        assert!(!LONG_OPTION.is_match("-h"));
318    }
319
320    #[test]
321    fn test_version_pattern_regex() {
322        assert!(VERSION_PATTERN.is_match("1.0.0"));
323        assert!(VERSION_PATTERN.is_match("2.5.3"));
324        assert!(VERSION_PATTERN.is_match("1.0.0-alpha.1"));
325        assert!(VERSION_PATTERN.is_match("curl 7.64.1"));
326    }
327
328    #[test]
329    fn test_option_with_value_regex() {
330        assert!(OPTION_WITH_VALUE.is_match("--name <value>"));
331        assert!(OPTION_WITH_VALUE.is_match("--file <path>"));
332        assert!(!OPTION_WITH_VALUE.is_match("--verbose"));
333    }
334
335    #[test]
336    fn test_extract_version() {
337        let parser = CliParser::new();
338
339        assert_eq!(
340            parser.extract_version("curl 7.64.1"),
341            Some("7.64.1".to_string())
342        );
343        assert_eq!(
344            parser.extract_version("version 1.0.0"),
345            Some("1.0.0".to_string())
346        );
347        assert_eq!(parser.extract_version("no version here"), None);
348    }
349
350    #[test]
351    fn test_parse_options_basic() {
352        let parser = CliParser::new();
353        let help_output = r#"
354Usage: test [OPTIONS]
355
356Options:
357  -h, --help       Print help information
358  -v, --verbose    Enable verbose output
359      --name <VALUE>  Set name value
360"#;
361
362        let options = parser.parse_options(help_output);
363
364        assert_eq!(options.len(), 3);
365
366        // Check --help option
367        assert!(options.iter().any(|o| o.long == Some("--help".to_string())));
368        assert!(options.iter().any(|o| o.short == Some("-h".to_string())));
369
370        // Check --verbose option
371        assert!(options
372            .iter()
373            .any(|o| o.long == Some("--verbose".to_string())));
374    }
375
376    #[test]
377    fn test_parse_options_deduplication() {
378        let parser = CliParser::new();
379        let help_output = r#"
380  -h, --help    Help text
381  -h, --help    Duplicate help text
382"#;
383
384        let options = parser.parse_options(help_output);
385
386        // Should only have one option despite duplicate
387        assert_eq!(options.len(), 1);
388    }
389
390    #[cfg(unix)]
391    #[test]
392    fn test_analyze_ls() {
393        // Test with /bin/ls which should be available on all Unix systems
394        let ls_path = Path::new("/bin/ls");
395        if !ls_path.exists() {
396            return; // Skip if ls not available
397        }
398
399        let parser = CliParser::new();
400        let result = parser.analyze(ls_path);
401
402        assert!(result.is_ok());
403
404        let analysis = result.unwrap();
405        assert_eq!(analysis.binary_name, "ls");
406        assert!(!analysis.help_output.is_empty());
407        assert!(!analysis.global_options.is_empty());
408    }
409
410    #[cfg(unix)]
411    #[test]
412    fn test_analyze_curl() {
413        // Test with curl if available
414        let curl_path = Path::new("/usr/bin/curl");
415        if !curl_path.exists() {
416            return; // Skip if curl not available
417        }
418
419        let parser = CliParser::new();
420        let result = parser.analyze(curl_path);
421
422        assert!(result.is_ok());
423
424        let analysis = result.unwrap();
425        assert_eq!(analysis.binary_name, "curl");
426        assert!(analysis.version.is_some());
427        assert!(!analysis.global_options.is_empty());
428
429        // Curl should have many options
430        assert!(analysis.global_options.len() > 10);
431    }
432}