use crate::model::{
ArgumentSpec, ArgumentType, EnvVarSpec, OptionSpec, OptionType, SubcommandSpec, ValidationRule,
ValidationType,
};
pub fn detect_help_flag(args: &[String]) -> bool {
args.iter().any(|arg| {
let lower = arg.to_lowercase();
matches!(
lower.as_str(),
"-h" | "--help" | "help" | "/?" | "-?" | "--usage"
)
})
}
pub fn parse_usages(stdout: &[u8], stderr: &[u8]) -> Vec<String> {
let stdout_s = String::from_utf8_lossy(stdout);
let stderr_s = String::from_utf8_lossy(stderr);
let lines: Vec<String> = stdout_s
.lines()
.map(|s| s.to_string())
.chain(stderr_s.lines().map(|s| s.to_string()))
.collect();
if lines.is_empty() {
return Vec::new();
}
let mut usage_indices = Vec::new();
for (idx, line) in lines.iter().enumerate() {
let l = line.to_lowercase();
if l.contains("usage:") || l.starts_with("usage ") || l.starts_with("usage:") {
usage_indices.push(idx);
}
}
if usage_indices.is_empty() {
return Vec::new();
}
usage_indices.sort_unstable();
usage_indices.dedup();
let mut results = Vec::new();
let context_before = 1usize;
let context_after = 10usize;
for idx in usage_indices {
let start = idx.saturating_sub(context_before);
let end = std::cmp::min(idx + 1 + context_after, lines.len());
let block = lines[start..end].join("\n");
results.push(block);
}
results
}
pub fn parse_options_from_usage_blocks(blocks: &[String]) -> Vec<OptionSpec> {
let mut options = Vec::new();
for block in blocks {
for raw_line in block.lines() {
let line = raw_line.trim_start();
if line.is_empty() {
continue;
}
if !line.starts_with('-') {
continue;
}
let (flag_part, desc_part) = split_flag_and_description(line);
let mut short_flags = Vec::new();
let mut long_flags = Vec::new();
let flag_part_normalized = flag_part.replace(" | ", ", ");
for token in flag_part_normalized.split(|c: char| c.is_whitespace() || c == ',') {
let t = token.trim();
if t.is_empty() {
continue;
}
let cleaned = t
.split(|c: char| c == '<' || c == '>' || c == '[' || c == ']' || c == '=')
.next()
.unwrap_or(t)
.trim();
if cleaned.is_empty() {
continue;
}
if cleaned.starts_with("--") {
long_flags.push(cleaned.to_string());
} else if cleaned.starts_with('-') {
short_flags.push(cleaned.to_string());
}
}
if short_flags.is_empty() && long_flags.is_empty() {
continue;
}
let description = desc_part
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty());
let (takes_argument, argument_name, option_type, choices) =
extract_option_metadata(&flag_part, description.as_deref());
let required = description
.as_ref()
.map(|d| d.to_lowercase().contains("required"))
.unwrap_or(false);
options.push(OptionSpec {
short_flags,
long_flags,
description: description.clone(),
option_type,
required,
default_value: extract_default_value(description.as_deref()),
takes_argument,
argument_name,
choices,
});
}
}
options
}
pub fn parse_options_from_sections(full_stdout: &str, full_stderr: &str) -> Vec<OptionSpec> {
let lines: Vec<String> = full_stdout
.lines()
.map(|s| s.to_string())
.chain(full_stderr.lines().map(|s| s.to_string()))
.collect();
if lines.is_empty() {
return Vec::new();
}
let mut options = Vec::new();
let mut in_options_section = false;
let mut options_start_idx = 0;
for (idx, line) in lines.iter().enumerate() {
let trimmed = line.trim().to_lowercase();
if trimmed == "options:" || trimmed == "options" {
in_options_section = true;
options_start_idx = idx + 1;
continue;
}
if in_options_section {
let trimmed_line = line.trim_start();
if trimmed_line.is_empty() {
if idx > options_start_idx + 2 {
in_options_section = false;
}
continue;
}
let lower = trimmed_line.to_lowercase();
if (lower.ends_with(':')
|| lower.ends_with("commands")
|| lower.ends_with("subcommands"))
&& !trimmed_line.starts_with(' ')
&& !trimmed_line.starts_with('\t')
{
in_options_section = false;
continue;
}
if trimmed_line.starts_with('-') {
let (flag_part, desc_part) = split_flag_and_description(trimmed_line);
let mut short_flags = Vec::new();
let mut long_flags = Vec::new();
let flag_part_normalized = flag_part.replace(" | ", ", ");
for token in flag_part_normalized.split(|c: char| c.is_whitespace() || c == ',') {
let t = token.trim();
if t.is_empty() {
continue;
}
let cleaned = t
.split(|c: char| c == '<' || c == '>' || c == '[' || c == ']' || c == '=')
.next()
.unwrap_or(t)
.trim();
if cleaned.is_empty() {
continue;
}
if cleaned.starts_with("--") {
long_flags.push(cleaned.to_string());
} else if cleaned.starts_with('-') {
short_flags.push(cleaned.to_string());
}
}
if !short_flags.is_empty() || !long_flags.is_empty() {
let description = desc_part
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty());
let (takes_argument, argument_name, option_type, choices) =
extract_option_metadata(&flag_part, description.as_deref());
let required = description
.as_ref()
.map(|d| d.to_lowercase().contains("required"))
.unwrap_or(false);
options.push(OptionSpec {
short_flags,
long_flags,
description: description.clone(),
option_type,
required,
default_value: extract_default_value(description.as_deref()),
takes_argument,
argument_name,
choices,
});
}
} else if !trimmed_line.starts_with(' ') && !trimmed_line.starts_with('\t') {
in_options_section = false;
}
}
}
options
}
pub fn parse_arguments(
full_stdout: &str,
full_stderr: &str,
usage_blocks: &[String],
) -> Vec<ArgumentSpec> {
let mut arguments = Vec::new();
for block in usage_blocks {
arguments.extend(parse_arguments_from_usage_line(block));
}
let lines: Vec<String> = full_stdout
.lines()
.map(|s| s.to_string())
.chain(full_stderr.lines().map(|s| s.to_string()))
.collect();
arguments.extend(parse_arguments_from_section(&lines));
arguments.sort_by(|a, b| a.name.cmp(&b.name));
arguments.dedup_by(|a, b| a.name == b.name && a.placeholder == b.placeholder);
arguments
}
fn parse_arguments_from_usage_line(usage_block: &str) -> Vec<ArgumentSpec> {
let mut arguments = Vec::new();
for line in usage_block.lines() {
let line_lower = line.to_lowercase();
if !line_lower.contains("usage:") && !line_lower.starts_with("usage ") {
continue;
}
let re = regex::Regex::new(r"(<([^>]+)>|\[([^\]]+)\])(\.\.\.)?").unwrap();
for cap in re.captures_iter(line) {
if let Some(full_match) = cap.get(1) {
let is_required = full_match.as_str().starts_with('<');
let name = if let Some(m) = cap.get(2) {
m.as_str().to_string()
} else if let Some(m) = cap.get(3) {
m.as_str().to_string()
} else {
continue;
};
let is_variadic = cap.get(4).is_some();
let placeholder = Some({
let mut p = full_match.as_str().to_string();
if is_variadic {
p.push_str("...");
}
p
});
let arg_type = infer_argument_type(&name, placeholder.as_deref());
arguments.push(ArgumentSpec {
name: name.clone(),
description: None,
required: is_required,
variadic: is_variadic,
arg_type,
placeholder,
});
}
}
}
arguments
}
fn parse_arguments_from_section(lines: &[String]) -> Vec<ArgumentSpec> {
let mut arguments = Vec::new();
let mut in_arguments_section = false;
let mut section_start_idx = 0;
for (idx, line) in lines.iter().enumerate() {
let trimmed = line.trim().to_lowercase();
if trimmed == "arguments:" || trimmed == "arguments" {
in_arguments_section = true;
section_start_idx = idx + 1;
continue;
}
if in_arguments_section {
let trimmed_line = line.trim_start();
if trimmed_line.is_empty() {
if idx > section_start_idx + 2 {
in_arguments_section = false;
}
continue;
}
let lower = trimmed_line.to_lowercase();
if (lower.ends_with(':') || lower.contains("options") || lower.contains("commands"))
&& !trimmed_line.starts_with(' ')
&& !trimmed_line.starts_with('\t')
{
in_arguments_section = false;
continue;
}
if trimmed_line.starts_with('<') || trimmed_line.starts_with('[') {
if let Some(arg) = parse_argument_line(trimmed_line) {
arguments.push(arg);
}
} else if !trimmed_line.starts_with(' ') && !trimmed_line.starts_with('\t') {
in_arguments_section = false;
}
}
}
arguments
}
fn parse_argument_line(line: &str) -> Option<ArgumentSpec> {
let trimmed = line.trim();
let re = regex::Regex::new(r"^(<([^>]+)>|\[([^\]]+)\])(\.\.\.)?").unwrap();
let cap = re.captures(trimmed)?;
let full_match = cap.get(1)?;
let is_required = full_match.as_str().starts_with('<');
let name = if let Some(m) = cap.get(2) {
m.as_str().to_string()
} else if let Some(m) = cap.get(3) {
m.as_str().to_string()
} else {
return None;
};
let is_variadic = cap.get(4).is_some();
let placeholder = Some({
let mut p = full_match.as_str().to_string();
if is_variadic {
p.push_str("...");
}
p
});
let full_cap = cap.get(0)?;
let desc_start = full_cap.end();
let rest = &trimmed[desc_start..].trim();
let description = if rest.is_empty() {
None
} else {
let desc = if let Some(idx) = rest.find(" ") {
rest[..idx].trim().to_string()
} else if let Some(idx) = rest.find('\t') {
rest[..idx].trim().to_string()
} else {
rest.to_string()
};
if desc.is_empty() { None } else { Some(desc) }
};
let arg_type = infer_argument_type(&name, placeholder.as_deref());
Some(ArgumentSpec {
name,
description,
required: is_required,
variadic: is_variadic,
arg_type,
placeholder,
})
}
fn infer_argument_type(name: &str, placeholder: Option<&str>) -> Option<ArgumentType> {
let name_upper = name.to_uppercase();
if name_upper.contains("FILE") || name_upper.contains("PATH") || name_upper.contains("DIR") {
return Some(ArgumentType::Path);
}
if name_upper.contains("URL") {
return Some(ArgumentType::Url);
}
if name_upper.contains("EMAIL") {
return Some(ArgumentType::Email);
}
if name_upper.contains("PORT") || name_upper.contains("NUM") || name_upper.contains("COUNT") {
return Some(ArgumentType::Number);
}
if let Some(ph) = placeholder {
let ph_lower = ph.to_lowercase();
if ph_lower.contains("file") || ph_lower.contains("path") || ph_lower.contains("dir") {
return Some(ArgumentType::Path);
}
if ph_lower.contains("url") {
return Some(ArgumentType::Url);
}
if ph_lower.contains("email") {
return Some(ArgumentType::Email);
}
if ph_lower.contains("port") || ph_lower.contains("num") {
return Some(ArgumentType::Number);
}
}
Some(ArgumentType::String)
}
fn extract_option_metadata(
flag_part: &str,
description: Option<&str>,
) -> (bool, Option<String>, OptionType, Vec<String>) {
let choice_pattern = regex::Regex::new(r"\{([^}]+)\}").unwrap();
let arg_pattern = regex::Regex::new(r"(<([^>]+)>|\[([^\]]+)\]|=\s*([^,\s]+))").unwrap();
let mut takes_argument = false;
let mut argument_name = None;
let choices = Vec::new();
let check_choice = |text: &str| -> Option<Vec<String>> {
if let Some(cap) = choice_pattern.captures(text) {
if let Some(choices_str) = cap.get(1) {
let choices_text = choices_str.as_str();
let ch: Vec<String> = choices_text
.split('|')
.map(|c| c.trim().to_string())
.filter(|c| !c.is_empty())
.collect();
if !ch.is_empty() {
return Some(ch);
}
}
}
None
};
if let Some(ch) = check_choice(flag_part) {
return (true, None, OptionType::Choice, ch);
}
if let Some(desc) = description {
if let Some(ch) = check_choice(desc) {
return (true, None, OptionType::Choice, ch);
}
}
for cap in arg_pattern.captures_iter(flag_part) {
takes_argument = true;
if let Some(m) = cap.get(2) {
argument_name = Some(m.as_str().to_string());
} else if let Some(m) = cap.get(3) {
argument_name = Some(m.as_str().to_string());
} else if let Some(m) = cap.get(4) {
argument_name = Some(m.as_str().to_string());
}
}
if !takes_argument {
if let Some(desc) = description {
let desc_lower = desc.to_lowercase();
if desc_lower.contains("takes")
|| desc_lower.contains("requires")
|| desc_lower.contains("specify")
{
let desc_arg_pattern = regex::Regex::new(r"<([^>]+)>|\[([^\]]+)\]").unwrap();
if let Some(cap) = desc_arg_pattern.captures(desc) {
takes_argument = true;
if let Some(m) = cap.get(1) {
argument_name = Some(m.as_str().to_string());
} else if let Some(m) = cap.get(2) {
argument_name = Some(m.as_str().to_string());
}
}
}
}
}
let option_type = if !choices.is_empty() {
OptionType::Choice
} else if takes_argument {
infer_option_type(argument_name.as_deref(), description)
} else {
OptionType::Boolean
};
(takes_argument, argument_name, option_type, choices)
}
fn infer_option_type(argument_name: Option<&str>, description: Option<&str>) -> OptionType {
if let Some(name) = argument_name {
let name_upper = name.to_uppercase();
if name_upper.contains("FILE") || name_upper.contains("PATH") || name_upper.contains("DIR")
{
return OptionType::Path;
}
if name_upper.contains("PORT")
|| name_upper.contains("NUM")
|| name_upper.contains("COUNT")
|| name_upper.contains("SIZE")
{
return OptionType::Number;
}
}
if let Some(desc) = description {
let desc_lower = desc.to_lowercase();
if desc_lower.contains("file")
|| desc_lower.contains("path")
|| desc_lower.contains("directory")
{
return OptionType::Path;
}
if desc_lower.contains("port")
|| desc_lower.contains("number")
|| desc_lower.contains("count")
|| desc_lower.contains("numeric")
{
return OptionType::Number;
}
}
OptionType::String
}
fn extract_default_value(description: Option<&str>) -> Option<String> {
if let Some(desc) = description {
let patterns = [
r"default:\s*([^\s,;)]+)",
r"defaults?\s+to\s+([^\s,;)]+)",
r"\(default:\s*([^)]+)\)",
r"\[default:\s*([^\]]+)\]",
];
for pattern in &patterns {
let re = regex::Regex::new(pattern).unwrap();
if let Some(cap) = re.captures(desc) {
if let Some(m) = cap.get(1) {
return Some(m.as_str().trim().to_string());
}
}
}
}
None
}
fn split_flag_and_description(line: &str) -> (String, Option<String>) {
let mut best_split: Option<(String, String)> = None;
if let Some(idx) = line.find(" ") {
let (left, right) = line.split_at(idx);
let flag_part = left.trim_end().to_string();
let desc_part = right.trim_start().to_string();
if !flag_part.is_empty() && !desc_part.is_empty() {
best_split = Some((flag_part, desc_part));
}
} else if let Some(idx) = line.find('\t') {
let (left, right) = line.split_at(idx);
let flag_part = left.trim_end().to_string();
let desc_part = right.trim_start().to_string();
if !flag_part.is_empty() && !desc_part.is_empty() {
best_split = Some((flag_part, desc_part));
}
}
if let Some((flags, desc)) = best_split {
(flags, Some(desc))
} else {
(line.to_string(), None)
}
}
pub fn parse_subcommands(full_stdout: &str, full_stderr: &str) -> Vec<SubcommandSpec> {
let lines: Vec<String> = full_stdout
.lines()
.map(|s| s.to_string())
.chain(full_stderr.lines().map(|s| s.to_string()))
.collect();
if lines.is_empty() {
return Vec::new();
}
let mut subcommands = Vec::new();
let mut header_indices = Vec::new();
for (idx, line) in lines.iter().enumerate() {
let l = line.trim().to_lowercase();
if l == "subcommands:" || l == "subcommands" || l == "commands:" || l == "commands" {
header_indices.push(idx);
}
else if l.ends_with("commands:") || l.ends_with("commands") {
if !l.contains("option") {
header_indices.push(idx);
}
}
}
let list_subcommands = find_subcommands_in_lists(&lines);
if header_indices.is_empty() {
return list_subcommands;
}
for header_idx in header_indices {
let mut i = header_idx + 1;
while i < lines.len() {
let raw = &lines[i];
let line = raw.trim_end();
if line.trim().is_empty() {
break;
}
let trimmed_start = line.trim_start();
if raw == trimmed_start {
break;
}
let mut parts = trimmed_start.splitn(2, char::is_whitespace);
let mut name = parts.next().unwrap_or("").trim().to_string();
let rest = parts.next().unwrap_or("").trim().to_string();
if name.is_empty() {
i += 1;
continue;
}
name = name.trim_end_matches(',').trim().to_string();
if name.starts_with('-') {
i += 1;
continue;
}
if name == "..." || name.is_empty() {
i += 1;
continue;
}
let mut description = if rest.is_empty() { None } else { Some(rest) };
let current_indent = raw.len() - trimmed_start.len();
let j = i + 1;
if j < lines.len() {
let next_raw = &lines[j];
let next_trimmed = next_raw.trim_start();
let next_indent = next_raw.len() - next_trimmed.len();
if !next_trimmed.is_empty()
&& next_raw != next_trimmed
&& !next_trimmed.starts_with('-')
&& next_indent > current_indent
{
let extra = next_trimmed.to_string();
description = Some(match description {
Some(existing) => format!("{existing} {extra}"),
None => extra,
});
i = j; }
}
let sc_name = name.clone();
subcommands.push(SubcommandSpec {
name: sc_name.clone(),
description,
full_path: sc_name,
parent: None,
options: Vec::new(),
arguments: Vec::new(),
subcommands: Vec::new(),
});
i += 1;
}
}
for list_sub in list_subcommands {
if !subcommands.iter().any(|s| s.name == list_sub.name) {
subcommands.push(list_sub);
}
}
subcommands
}
fn find_subcommands_in_lists(lines: &[String]) -> Vec<SubcommandSpec> {
let mut subcommands = Vec::new();
for (idx, line) in lines.iter().enumerate() {
let lower = line.trim().to_lowercase();
if (lower.contains("commands") && (lower.contains("are") || lower.contains("available")))
|| (lower.contains("common") && lower.contains("commands"))
{
let mut i = idx + 1;
while i < lines.len() && i < idx + 50 {
let raw = &lines[i];
let trimmed = raw.trim_start();
if trimmed.is_empty() {
if i > idx + 3 {
break;
}
i += 1;
continue;
}
let lower_next = trimmed.to_lowercase();
if (lower_next.ends_with(':') && !trimmed.starts_with(' '))
|| (lower_next.contains("options") && !trimmed.starts_with(' '))
{
break;
}
if raw != trimmed && !trimmed.starts_with('-') {
let parts: Vec<&str> = trimmed.splitn(2, char::is_whitespace).collect();
if let Some(raw_name) = parts.first() {
let name = raw_name.trim().trim_end_matches(',').trim();
if !name.is_empty()
&& name != "..."
&& !name.starts_with('-')
&& !name.starts_with('[')
&& name
.chars()
.all(|c| c.is_alphanumeric() || c == '-' || c == '_')
{
let description = parts
.get(1)
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty());
let sc_name = name.to_string();
subcommands.push(SubcommandSpec {
name: sc_name.clone(),
description,
full_path: sc_name,
parent: None,
options: Vec::new(),
arguments: Vec::new(),
subcommands: Vec::new(),
});
}
}
} else if raw == trimmed {
break;
}
i += 1;
}
}
}
subcommands
}
pub fn parse_examples(stdout: &str, stderr: &str) -> Vec<crate::model::Example> {
let mut examples = Vec::new();
let combined = format!("{}\n{}", stdout, stderr);
let lines: Vec<String> = combined.lines().map(|s| s.to_string()).collect();
let mut in_examples_section = false;
let mut section_start_idx = 0;
for (idx, line) in lines.iter().enumerate() {
let trimmed = line.trim().to_lowercase();
if trimmed == "examples:"
|| trimmed == "example:"
|| trimmed.starts_with("examples:")
|| trimmed.starts_with("example:")
{
in_examples_section = true;
section_start_idx = idx + 1;
continue;
}
if in_examples_section {
let trimmed_line = line.trim_start();
if trimmed_line.is_empty() {
if idx > section_start_idx + 3 {
let mut found_header = false;
for j in (idx + 1)..lines.len().min(idx + 5) {
let next_trimmed = lines[j].trim().to_lowercase();
if next_trimmed.ends_with(':')
&& !next_trimmed.starts_with(' ')
&& !next_trimmed.starts_with('\t')
{
found_header = true;
break;
}
}
if found_header {
in_examples_section = false;
}
}
continue;
}
let lower = trimmed_line.to_lowercase();
if (lower.ends_with(':')
|| lower.contains("options")
|| lower.contains("commands")
|| lower.contains("arguments"))
&& !trimmed_line.starts_with(' ')
&& !trimmed_line.starts_with('\t')
{
in_examples_section = false;
continue;
}
let is_command_line = trimmed_line.starts_with('$')
|| trimmed_line.starts_with('>')
|| trimmed_line.starts_with('#')
|| (trimmed_line.len() > 0
&& !trimmed_line.starts_with('-')
&& !trimmed_line.starts_with('[')
&& (trimmed_line.contains(' ') || trimmed_line.len() > 10));
if is_command_line {
let command = trimmed_line
.trim_start_matches('$')
.trim_start_matches('>')
.trim_start_matches('#')
.trim()
.to_string();
let mut description = None;
if idx > 0 {
let prev_line = lines[idx - 1].trim();
if !prev_line.is_empty()
&& !prev_line.starts_with('$')
&& !prev_line.starts_with('>')
&& !prev_line.starts_with('#')
&& prev_line.len() > 10
{
description = Some(prev_line.to_string());
}
}
if description.is_none() && idx + 1 < lines.len() {
let next_line = lines[idx + 1].trim();
if !next_line.is_empty()
&& !next_line.starts_with('$')
&& !next_line.starts_with('>')
&& !next_line.starts_with('#')
&& !next_line.starts_with('-')
&& next_line.len() > 10
{
description = Some(next_line.to_string());
}
}
let mut tags = Vec::new();
if let Some(desc) = &description {
let desc_lower = desc.to_lowercase();
if desc_lower.contains("basic") || desc_lower.contains("simple") {
tags.push("basic".to_string());
}
if desc_lower.contains("advanced") || desc_lower.contains("complex") {
tags.push("advanced".to_string());
}
if desc_lower.contains("common") || desc_lower.contains("typical") {
tags.push("common".to_string());
}
}
if tags.is_empty() {
tags.push("example".to_string());
}
examples.push(crate::model::Example {
command,
description,
tags,
});
} else if !trimmed_line.starts_with(' ') && !trimmed_line.starts_with('\t') {
if idx > section_start_idx + 2 {
in_examples_section = false;
}
}
}
}
examples
}
pub fn parse_environment_variables(
stdout: &str,
stderr: &str,
options: &[OptionSpec],
) -> Vec<EnvVarSpec> {
let mut env_vars = Vec::new();
let combined = format!("{}\n{}", stdout, stderr);
let lines: Vec<String> = combined.lines().map(|s| s.to_string()).collect();
let env_var_patterns = [
(r"\$([A-Z_][A-Z0-9_]*)", "dollar_sign"),
(r"\$\{([A-Z_][A-Z0-9_]*)\}", "dollar_brace"),
(
r"\b([A-Z_][A-Z0-9_]*)\s+(?:environment\s+)?variable",
"explicit_var",
),
(r"(?:set|use|via)\s+([A-Z_][A-Z0-9_]*)", "set_pattern"),
];
let mut option_map = std::collections::HashMap::new();
for opt in options {
for long_flag in &opt.long_flags {
let opt_name = long_flag
.trim_start_matches("--")
.replace('-', "_")
.to_uppercase();
option_map.insert(opt_name.clone(), long_flag.clone());
}
for short_flag in &opt.short_flags {
let opt_name = short_flag.trim_start_matches("-").to_uppercase();
option_map.insert(opt_name, short_flag.clone());
}
}
let mut found_vars = std::collections::HashSet::new();
for line in &lines {
let line_lower = line.to_lowercase();
if !line_lower.contains("environment")
&& !line_lower.contains("env")
&& !line_lower.contains("$")
&& !line_lower.contains("variable")
{
continue;
}
for (pattern, _pattern_type) in &env_var_patterns {
let re = regex::Regex::new(pattern).unwrap();
for cap in re.captures_iter(line) {
if let Some(var_name) = cap.get(1) {
let var_name = var_name.as_str().to_uppercase();
if var_name.len() < 2
|| var_name == "THE"
|| var_name == "CAN"
|| var_name == "SET"
{
continue;
}
if found_vars.contains(&var_name) {
continue;
}
found_vars.insert(var_name.clone());
let option_mapped = option_map.get(&var_name).cloned();
let description = if line.len() > var_name.len() + 10 {
Some(line.trim().to_string())
} else {
None
};
let default_value = extract_env_default_value(line);
env_vars.push(EnvVarSpec {
name: var_name,
description,
option_mapped,
default_value,
});
}
}
}
for opt in options {
if let Some(desc) = &opt.description {
let desc_lower = desc.to_lowercase();
if desc_lower.contains("environment") || desc_lower.contains("env var") {
let var_re = regex::Regex::new(r"\b([A-Z_][A-Z0-9_]{2,})\b").unwrap();
for cap in var_re.captures_iter(desc) {
let var_name = cap.get(1).unwrap().as_str().to_uppercase();
if var_name
== opt
.long_flags
.first()
.unwrap_or(&String::new())
.trim_start_matches("--")
.replace('-', "_")
.to_uppercase()
{
continue;
}
if !found_vars.contains(&var_name) && var_name.len() >= 3 {
found_vars.insert(var_name.clone());
let option_mapped = opt.long_flags.first().cloned();
env_vars.push(EnvVarSpec {
name: var_name,
description: Some(desc.clone()),
option_mapped,
default_value: extract_env_default_value(desc),
});
}
}
}
}
}
}
env_vars
}
fn extract_env_default_value(text: &str) -> Option<String> {
let text_lower = text.to_lowercase();
let patterns = [
r"default[:\s]+([^\s,;)]+)",
r"defaults?\s+to\s+([^\s,;)]+)",
r"\(default[:\s]+([^)]+)\)",
];
for pattern in &patterns {
let re = regex::Regex::new(pattern).unwrap();
if let Some(cap) = re.captures(&text_lower) {
if let Some(m) = cap.get(1) {
return Some(m.as_str().trim().to_string());
}
}
}
None
}
pub fn parse_validation_rules(
_stdout: &str,
_stderr: &str,
options: &[OptionSpec],
arguments: &[ArgumentSpec],
) -> Vec<ValidationRule> {
let mut rules = Vec::new();
for opt in options {
if let Some(desc) = &opt.description {
if let Some(rule) = extract_validation_rule_from_description(
desc,
&opt.long_flags.first().unwrap_or(&String::new()).clone(),
) {
rules.push(rule);
}
}
}
for arg in arguments {
if let Some(desc) = &arg.description {
if let Some(rule) = extract_validation_rule_from_description(desc, &arg.name) {
rules.push(rule);
}
}
if arg.required {
rules.push(ValidationRule {
target: arg.name.clone(),
rule_type: ValidationType::Required,
pattern: None,
min: None,
max: None,
message: Some(format!("{} is required", arg.name)),
});
}
if let Some(arg_type) = &arg.arg_type {
match arg_type {
ArgumentType::Email => {
rules.push(ValidationRule {
target: arg.name.clone(),
rule_type: ValidationType::Format,
pattern: Some(
r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$".to_string(),
),
min: None,
max: None,
message: Some("Must be a valid email address".to_string()),
});
}
ArgumentType::Url => {
rules.push(ValidationRule {
target: arg.name.clone(),
rule_type: ValidationType::Format,
pattern: Some(r"^https?://.+".to_string()),
min: None,
max: None,
message: Some("Must be a valid URL".to_string()),
});
}
ArgumentType::Number => {
rules.push(ValidationRule {
target: arg.name.clone(),
rule_type: ValidationType::Pattern,
pattern: Some(r"^\d+(\.\d+)?$".to_string()),
min: None,
max: None,
message: Some("Must be a number".to_string()),
});
}
_ => {}
}
}
}
for opt in options {
if !opt.choices.is_empty() {
let target = opt.long_flags.first().unwrap_or(&String::new()).clone();
rules.push(ValidationRule {
target: target.clone(),
rule_type: ValidationType::Choice,
pattern: None,
min: None,
max: None,
message: Some(format!("Must be one of: {}", opt.choices.join(", "))),
});
}
}
rules
}
fn extract_validation_rule_from_description(desc: &str, target: &str) -> Option<ValidationRule> {
let desc_lower = desc.to_lowercase();
let range_pattern = regex::Regex::new(
r"(?:between|from|range|must be)\s+(\d+(?:\.\d+)?)\s*(?:-|to)\s*(\d+(?:\.\d+)?)",
)
.unwrap();
if let Some(cap) = range_pattern.captures(&desc_lower) {
if let (Some(min_str), Some(max_str)) = (cap.get(1), cap.get(2)) {
if let (Ok(min), Ok(max)) = (
min_str.as_str().parse::<f64>(),
max_str.as_str().parse::<f64>(),
) {
return Some(ValidationRule {
target: target.to_string(),
rule_type: ValidationType::Range,
pattern: None,
min: Some(min),
max: Some(max),
message: Some(format!("Must be between {} and {}", min, max)),
});
}
}
}
let min_pattern = regex::Regex::new(r"(?:minimum|min|at least|>=)\s+(\d+(?:\.\d+)?)").unwrap();
let max_pattern = regex::Regex::new(r"(?:maximum|max|at most|<=)\s+(\d+(?:\.\d+)?)").unwrap();
let min = min_pattern
.captures(&desc_lower)
.and_then(|c| c.get(1))
.and_then(|m| m.as_str().parse::<f64>().ok());
let max = max_pattern
.captures(&desc_lower)
.and_then(|c| c.get(1))
.and_then(|m| m.as_str().parse::<f64>().ok());
if min.is_some() || max.is_some() {
return Some(ValidationRule {
target: target.to_string(),
rule_type: ValidationType::Range,
pattern: None,
min,
max,
message: Some(desc.to_string()),
});
}
if desc_lower.contains("valid email") || desc_lower.contains("email address") {
return Some(ValidationRule {
target: target.to_string(),
rule_type: ValidationType::Format,
pattern: Some(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$".to_string()),
min: None,
max: None,
message: Some("Must be a valid email address".to_string()),
});
}
if desc_lower.contains("valid url") || desc_lower.contains("url") {
return Some(ValidationRule {
target: target.to_string(),
rule_type: ValidationType::Format,
pattern: Some(r"^https?://.+".to_string()),
min: None,
max: None,
message: Some("Must be a valid URL".to_string()),
});
}
let pattern_mention =
regex::Regex::new(r"(?:pattern|regex|match|format)\s*[:=]\s*([^\s,;)]+)").unwrap();
if let Some(cap) = pattern_mention.captures(desc) {
if let Some(pattern) = cap.get(1) {
return Some(ValidationRule {
target: target.to_string(),
rule_type: ValidationType::Pattern,
pattern: Some(pattern.as_str().to_string()),
min: None,
max: None,
message: Some(desc.to_string()),
});
}
}
None
}