1use std::collections::BTreeMap;
18
19use crate::workflow::{VarType, Variable};
20
21#[derive(Debug, Clone)]
23pub struct ParameterSuggestion {
24 pub original_value: String,
26 pub suggested_name: String,
28 pub description: String,
30 pub category: DetectedCategory,
32 pub confidence: f64,
34}
35
36#[derive(Debug, Clone, PartialEq, Eq, Hash)]
38pub enum DetectedCategory {
39 Url,
40 FilePath,
41 ApiKey,
42 Email,
43 Port,
44 Domain,
45 GitRepo,
46 DockerImage,
47 IpAddress,
48 DatabaseUrl,
49 EnvVar,
50 UserSpecific,
52}
53
54impl DetectedCategory {
55 pub fn label(&self) -> &'static str {
56 match self {
57 Self::Url => "URL",
58 Self::FilePath => "File path",
59 Self::ApiKey => "API key/token",
60 Self::Email => "Email",
61 Self::Port => "Port",
62 Self::Domain => "Domain",
63 Self::GitRepo => "Git repository",
64 Self::DockerImage => "Docker image",
65 Self::IpAddress => "IP address",
66 Self::DatabaseUrl => "Database URL",
67 Self::EnvVar => "Environment variable",
68 Self::UserSpecific => "User-specific value",
69 }
70 }
71}
72
73pub fn detect_parameterizable_values(texts: &[&str]) -> Vec<ParameterSuggestion> {
78 let mut suggestions = Vec::new();
79 let mut seen_values: BTreeMap<String, String> = BTreeMap::new(); for text in texts {
82 detect_urls(text, &mut suggestions, &mut seen_values);
83 detect_file_paths(text, &mut suggestions, &mut seen_values);
84 detect_api_keys(text, &mut suggestions, &mut seen_values);
85 detect_emails(text, &mut suggestions, &mut seen_values);
86 detect_ports(text, &mut suggestions, &mut seen_values);
87 detect_ip_addresses(text, &mut suggestions, &mut seen_values);
88 detect_database_urls(text, &mut suggestions, &mut seen_values);
89 detect_docker_images(text, &mut suggestions, &mut seen_values);
90 detect_git_repos(text, &mut suggestions, &mut seen_values);
91 detect_user_specific(text, &mut suggestions, &mut seen_values);
92 }
93
94 suggestions.sort_by(|a, b| b.confidence.partial_cmp(&a.confidence).unwrap());
96 let mut deduped = Vec::new();
97 let mut seen = std::collections::HashSet::new();
98 for s in suggestions {
99 if seen.insert(s.original_value.clone()) {
100 deduped.push(s);
101 }
102 }
103 deduped
104}
105
106pub fn suggestions_to_variables(suggestions: &[ParameterSuggestion]) -> Vec<Variable> {
108 let mut vars = Vec::new();
109 let mut used_names = std::collections::HashSet::new();
110
111 for s in suggestions {
112 let name = if used_names.contains(&s.suggested_name) {
113 let mut n = s.suggested_name.clone();
115 let mut i = 2;
116 while used_names.contains(&n) {
117 n = format!("{}_{}", s.suggested_name, i);
118 i += 1;
119 }
120 n
121 } else {
122 s.suggested_name.clone()
123 };
124 used_names.insert(name.clone());
125
126 vars.push(Variable {
127 name,
128 var_type: VarType::String,
129 required: s.category != DetectedCategory::Port,
130 default_value: Some(s.original_value.clone()),
131 description: s.description.clone(),
132 });
133 }
134 vars
135}
136
137pub fn apply_parameterization(text: &str, suggestions: &[ParameterSuggestion]) -> String {
139 let mut result = text.to_string();
140 let mut sorted: Vec<_> = suggestions.iter().collect();
142 sorted.sort_by_key(|s| std::cmp::Reverse(s.original_value.len()));
143
144 for s in sorted {
145 result = result.replace(&s.original_value, &format!("{{{{{}}}}}", s.suggested_name));
146 }
147 result
148}
149
150pub fn format_suggestions_display(suggestions: &[ParameterSuggestion]) -> String {
152 if suggestions.is_empty() {
153 return String::from(" No parameterizable values detected.");
154 }
155
156 let mut out = String::new();
157 for (i, s) in suggestions.iter().enumerate() {
158 out.push_str(&format!(
159 " {}. [{}] \"{}\" → {{{{{}}}}}\n",
160 i + 1,
161 s.category.label(),
162 truncate_display(&s.original_value, 50),
163 s.suggested_name,
164 ));
165 out.push_str(&format!(" {}\n", s.description));
166 }
167 out
168}
169
170fn truncate_display(s: &str, max: usize) -> String {
171 if s.len() <= max {
172 s.to_string()
173 } else {
174 format!("{}…", &s[..max])
175 }
176}
177
178fn add_suggestion(
181 suggestions: &mut Vec<ParameterSuggestion>,
182 seen: &mut BTreeMap<String, String>,
183 value: &str,
184 name: &str,
185 desc: &str,
186 category: DetectedCategory,
187 confidence: f64,
188) {
189 if seen.contains_key(value) {
190 return;
191 }
192 seen.insert(value.to_string(), name.to_string());
193 suggestions.push(ParameterSuggestion {
194 original_value: value.to_string(),
195 suggested_name: name.to_string(),
196 description: desc.to_string(),
197 category,
198 confidence,
199 });
200}
201
202fn detect_urls(
204 text: &str,
205 suggestions: &mut Vec<ParameterSuggestion>,
206 seen: &mut BTreeMap<String, String>,
207) {
208 let mut i = 0;
210 let bytes = text.as_bytes();
211 while i < bytes.len() {
212 if text[i..].starts_with("http://") || text[i..].starts_with("https://") {
213 let start = i;
214 while i < bytes.len() && !b" \t\n\r\"'`,;)}>]".contains(&bytes[i]) {
216 i += 1;
217 }
218 let url = &text[start..i];
219
220 if url.contains("github.com/rust-lang")
222 || url.contains("docs.rs")
223 || url.contains("crates.io")
224 || url.len() < 12
225 {
226 continue;
227 }
228
229 let name = classify_url(url);
230 let desc = format!("{} detected in workflow", url_category_desc(&name));
231 add_suggestion(
232 suggestions,
233 seen,
234 url,
235 &name,
236 &desc,
237 DetectedCategory::Url,
238 0.9,
239 );
240 } else {
241 i += 1;
242 }
243 }
244}
245
246fn classify_url(url: &str) -> String {
248 let lower = url.to_lowercase();
249 if lower.contains("/api/")
250 || lower.contains("/v1/")
251 || lower.contains("/v2/")
252 || lower.contains("/graphql")
253 {
254 "api_url".to_string()
255 } else if lower.contains("localhost") || lower.contains("127.0.0.1") {
256 "local_url".to_string()
257 } else if lower.contains(".git") || lower.contains("github.com") || lower.contains("gitlab.com")
258 {
259 "repo_url".to_string()
260 } else if lower.contains("docker") || lower.contains("registry") {
261 "registry_url".to_string()
262 } else if lower.contains("database")
263 || lower.contains("postgres")
264 || lower.contains("mysql")
265 || lower.contains("mongo")
266 {
267 "db_url".to_string()
268 } else {
269 "base_url".to_string()
270 }
271}
272
273fn url_category_desc(name: &str) -> &str {
274 match name {
275 "api_url" => "API endpoint URL",
276 "local_url" => "Local development URL",
277 "repo_url" => "Git repository URL",
278 "registry_url" => "Container registry URL",
279 "db_url" => "Database connection URL",
280 _ => "Base URL",
281 }
282}
283
284fn detect_file_paths(
286 text: &str,
287 suggestions: &mut Vec<ParameterSuggestion>,
288 seen: &mut BTreeMap<String, String>,
289) {
290 for word in text.split_whitespace() {
292 let word = word.trim_matches(|c: char| c == '"' || c == '\'' || c == ',' || c == ';');
293
294 if word.starts_with("/Users/") || word.starts_with("/home/") {
295 let parts: Vec<&str> = word.split('/').collect();
297 if parts.len() >= 4 {
298 let name = if word.contains("/Projects/")
299 || word.contains("/project")
300 || word.contains("/src/")
301 {
302 "project_dir"
303 } else if word.contains("/output")
304 || word.contains("/dist/")
305 || word.contains("/build/")
306 {
307 "output_dir"
308 } else {
309 "target_path"
310 };
311 add_suggestion(
312 suggestions,
313 seen,
314 word,
315 name,
316 "Absolute file path (user-specific, should be parameterized)",
317 DetectedCategory::FilePath,
318 0.95,
319 );
320 }
321 } else if word.starts_with("~/") && word.len() > 3 {
322 add_suggestion(
323 suggestions,
324 seen,
325 word,
326 "target_path",
327 "Home-relative path (may differ across machines)",
328 DetectedCategory::FilePath,
329 0.7,
330 );
331 } else if word.starts_with("/tmp/") || word.starts_with("/var/") {
332 add_suggestion(
333 suggestions,
334 seen,
335 word,
336 "temp_path",
337 "Temporary/system path",
338 DetectedCategory::FilePath,
339 0.6,
340 );
341 }
342 }
343}
344
345fn detect_api_keys(
347 text: &str,
348 suggestions: &mut Vec<ParameterSuggestion>,
349 seen: &mut BTreeMap<String, String>,
350) {
351 let key_prefixes = [
353 ("sk-", "api_key", "API secret key"),
354 ("sk_live_", "stripe_key", "Stripe live API key"),
355 ("sk_test_", "stripe_test_key", "Stripe test API key"),
356 ("pk_live_", "stripe_pub_key", "Stripe publishable key"),
357 ("ghp_", "github_token", "GitHub personal access token"),
358 ("gho_", "github_oauth_token", "GitHub OAuth token"),
359 ("ghs_", "github_server_token", "GitHub server token"),
360 ("glpat-", "gitlab_token", "GitLab personal access token"),
361 ("xoxb-", "slack_bot_token", "Slack bot token"),
362 ("xoxp-", "slack_user_token", "Slack user token"),
363 ("AKIA", "aws_access_key", "AWS access key ID"),
364 ("Bearer ", "auth_token", "Bearer authentication token"),
365 ("token ", "auth_token", "Authentication token"),
366 ];
367
368 for token in text.split_whitespace() {
369 let token = token.trim_matches(|c: char| c == '"' || c == '\'' || c == ',' || c == ';');
370 let word = if let Some(pos) = token.find('=') {
372 &token[pos + 1..]
373 } else {
374 token
375 };
376 for (prefix, name, desc) in &key_prefixes {
377 if word.starts_with(prefix) && word.len() > prefix.len() + 4 {
378 add_suggestion(
379 suggestions,
380 seen,
381 word,
382 name,
383 desc,
384 DetectedCategory::ApiKey,
385 1.0,
386 );
387 break;
388 }
389 }
390
391 if word.starts_with('$') && word.len() > 2 {
393 let var_name = word.trim_start_matches('$');
394 let lower = var_name.to_lowercase();
395 if lower.contains("key")
396 || lower.contains("token")
397 || lower.contains("secret")
398 || lower.contains("password")
399 || lower.contains("api")
400 {
401 let suggested = lower.replace('-', "_");
402 add_suggestion(
403 suggestions,
404 seen,
405 word,
406 &suggested,
407 &format!("Environment variable reference: {}", var_name),
408 DetectedCategory::EnvVar,
409 0.8,
410 );
411 }
412 }
413 }
414
415 for word in text.split_whitespace() {
417 let word = word.trim_matches(|c: char| c == '"' || c == '\'' || c == ',' || c == ';');
418 if word.len() >= 32
419 && word.chars().all(|c| c.is_ascii_hexdigit())
420 && !seen.contains_key(word)
421 {
422 add_suggestion(
423 suggestions,
424 seen,
425 word,
426 "auth_token",
427 "Long hex string (likely a token or hash)",
428 DetectedCategory::ApiKey,
429 0.7,
430 );
431 }
432 }
433}
434
435fn detect_emails(
437 text: &str,
438 suggestions: &mut Vec<ParameterSuggestion>,
439 seen: &mut BTreeMap<String, String>,
440) {
441 for word in text.split_whitespace() {
442 let word = word.trim_matches(|c: char| {
443 c == '"' || c == '\'' || c == ',' || c == ';' || c == '<' || c == '>'
444 });
445 if word.starts_with("git@") {
447 continue;
448 }
449 if word.contains('@') && word.contains('.') && word.len() > 5 {
450 let parts: Vec<&str> = word.split('@').collect();
452 if parts.len() == 2 && !parts[0].is_empty() && parts[1].contains('.') {
453 add_suggestion(
454 suggestions,
455 seen,
456 word,
457 "email",
458 "Email address",
459 DetectedCategory::Email,
460 0.85,
461 );
462 }
463 }
464 }
465}
466
467fn detect_ports(
469 text: &str,
470 suggestions: &mut Vec<ParameterSuggestion>,
471 seen: &mut BTreeMap<String, String>,
472) {
473 let mut i = 0;
475 let chars: Vec<char> = text.chars().collect();
476 while i < chars.len() {
477 if chars[i] == ':' && i + 1 < chars.len() && chars[i + 1].is_ascii_digit() {
478 let start = i + 1;
479 let mut end = start;
480 while end < chars.len() && chars[end].is_ascii_digit() {
481 end += 1;
482 }
483 let port_str: String = chars[start..end].iter().collect();
484 if let Ok(port) = port_str.parse::<u16>()
485 && (1024..=65535).contains(&port)
486 && !seen.contains_key(&port_str)
487 {
488 let before: String = chars[..i]
490 .iter()
491 .rev()
492 .take(20)
493 .collect::<String>()
494 .chars()
495 .rev()
496 .collect();
497 if before.contains("localhost")
498 || before.contains("0.0.0.0")
499 || before.contains("127.0.0.1")
500 || before.ends_with("://")
501 || before
502 .chars()
503 .last()
504 .is_some_and(|c| c.is_alphanumeric() || c == '.')
505 {
506 add_suggestion(
507 suggestions,
508 seen,
509 &port_str,
510 "port",
511 &format!("Port number ({})", port),
512 DetectedCategory::Port,
513 0.6,
514 );
515 }
516 }
517 i = end;
518 } else {
519 i += 1;
520 }
521 }
522}
523
524fn detect_ip_addresses(
526 text: &str,
527 suggestions: &mut Vec<ParameterSuggestion>,
528 seen: &mut BTreeMap<String, String>,
529) {
530 for word in text.split_whitespace() {
532 let word = word.trim_matches(|c: char| !c.is_ascii_digit() && c != '.');
533 let parts: Vec<&str> = word.split('.').collect();
534 if parts.len() == 4 && parts.iter().all(|p| p.parse::<u8>().is_ok()) {
535 if word == "127.0.0.1" || word == "0.0.0.0" {
537 continue;
538 }
539 add_suggestion(
540 suggestions,
541 seen,
542 word,
543 "ip_address",
544 "IP address (environment-specific)",
545 DetectedCategory::IpAddress,
546 0.8,
547 );
548 }
549 }
550}
551
552fn detect_database_urls(
554 text: &str,
555 suggestions: &mut Vec<ParameterSuggestion>,
556 seen: &mut BTreeMap<String, String>,
557) {
558 let db_prefixes = [
559 "postgres://",
560 "postgresql://",
561 "mysql://",
562 "mongodb://",
563 "mongodb+srv://",
564 "redis://",
565 "sqlite://",
566 ];
567 for token in text.split_whitespace() {
568 let token = token.trim_matches(|c: char| c == '"' || c == '\'');
569 let word = if let Some(pos) = token.find('=') {
571 &token[pos + 1..]
572 } else {
573 token
574 };
575 for prefix in &db_prefixes {
576 if word.starts_with(prefix) {
577 add_suggestion(
578 suggestions,
579 seen,
580 word,
581 "database_url",
582 "Database connection URL (contains credentials)",
583 DetectedCategory::DatabaseUrl,
584 1.0,
585 );
586 break;
587 }
588 }
589 }
590}
591
592fn detect_docker_images(
594 text: &str,
595 suggestions: &mut Vec<ParameterSuggestion>,
596 seen: &mut BTreeMap<String, String>,
597) {
598 let docker_indicators = ["docker pull", "docker run", "docker push", "FROM "];
600 for indicator in &docker_indicators {
601 if let Some(pos) = text.find(indicator) {
602 let rest = &text[pos + indicator.len()..];
603 let image: String = rest
604 .trim_start()
605 .chars()
606 .take_while(|c| {
607 c.is_alphanumeric()
608 || *c == '/'
609 || *c == ':'
610 || *c == '.'
611 || *c == '-'
612 || *c == '_'
613 })
614 .collect();
615 if !image.is_empty() && image.len() > 3 {
616 add_suggestion(
617 suggestions,
618 seen,
619 &image,
620 "docker_image",
621 "Docker image reference",
622 DetectedCategory::DockerImage,
623 0.85,
624 );
625 }
626 }
627 }
628}
629
630fn detect_git_repos(
632 text: &str,
633 suggestions: &mut Vec<ParameterSuggestion>,
634 seen: &mut BTreeMap<String, String>,
635) {
636 for word in text.split_whitespace() {
638 let word = word.trim_matches(|c: char| c == '"' || c == '\'');
639 if word.starts_with("git@") && word.contains(':') && word.contains('/') {
640 add_suggestion(
641 suggestions,
642 seen,
643 word,
644 "repo_url",
645 "Git SSH repository URL",
646 DetectedCategory::GitRepo,
647 0.9,
648 );
649 }
650 }
651}
652
653fn detect_user_specific(
655 text: &str,
656 suggestions: &mut Vec<ParameterSuggestion>,
657 seen: &mut BTreeMap<String, String>,
658) {
659 if let Some(home) = dirs::home_dir() {
661 let home_str = home.to_string_lossy().to_string();
662 if text.contains(&home_str) && !seen.contains_key(&home_str) {
663 add_suggestion(
664 suggestions,
665 seen,
666 &home_str,
667 "home_dir",
668 "User home directory (machine-specific)",
669 DetectedCategory::UserSpecific,
670 0.95,
671 );
672 }
673 }
674
675 if let Ok(user) = std::env::var("USER")
677 && user.len() >= 3
678 {
679 let user_in_path = format!("/Users/{}", user);
680 let user_in_home = format!("/home/{}", user);
681 for pattern in [&user_in_path, &user_in_home] {
682 if text.contains(pattern.as_str()) && !seen.contains_key(pattern.as_str()) {
683 }
685 }
686 }
687}
688
689pub fn scan_workflow(workflow: &crate::workflow::Workflow) -> Vec<ParameterSuggestion> {
695 let content_text = workflow.base.content.as_text();
696 let mut texts: Vec<&str> = Vec::new();
697
698 texts.push(workflow.base.description.as_str());
699 texts.push(content_text.as_ref());
700
701 for step in &workflow.steps {
702 texts.push(step.description.as_str());
703 if let Some(ref cmd) = step.command {
704 texts.push(cmd.as_str());
705 }
706 }
707
708 detect_parameterizable_values(&texts)
709}
710
711pub fn parameterize_workflow(
713 workflow: &mut crate::workflow::Workflow,
714 suggestions: &[ParameterSuggestion],
715) {
716 if suggestions.is_empty() {
717 return;
718 }
719
720 workflow.base.description = apply_parameterization(&workflow.base.description, suggestions);
722
723 let new_content = apply_parameterization(&workflow.base.content.as_text(), suggestions);
725 workflow.base.content = crate::pattern::Content::Plain(new_content);
726
727 for step in &mut workflow.steps {
729 step.description = apply_parameterization(&step.description, suggestions);
730 if let Some(ref cmd) = step.command {
731 step.command = Some(apply_parameterization(cmd, suggestions));
732 }
733 }
734
735 let existing_names: std::collections::HashSet<String> =
737 workflow.variables.iter().map(|v| v.name.clone()).collect();
738 let new_vars = suggestions_to_variables(suggestions);
739 for var in new_vars {
740 if !existing_names.contains(&var.name) {
741 workflow.variables.push(var);
742 }
743 }
744}
745
746#[cfg(test)]
749mod tests {
750 use super::*;
751
752 #[test]
753 fn test_detect_urls() {
754 let texts = vec!["Deploy to https://api.example.com/v1/deploy"];
755 let suggestions = detect_parameterizable_values(&texts);
756 assert!(!suggestions.is_empty());
757 assert_eq!(suggestions[0].suggested_name, "api_url");
758 assert_eq!(suggestions[0].category, DetectedCategory::Url);
759 }
760
761 #[test]
762 fn test_detect_file_paths() {
763 let texts = vec!["Run build in /Users/david/Projects/myapp"];
764 let suggestions = detect_parameterizable_values(&texts);
765 assert!(
766 suggestions
767 .iter()
768 .any(|s| s.category == DetectedCategory::FilePath)
769 );
770 }
771
772 #[test]
773 fn test_detect_api_keys() {
774 let texts = vec!["Use key sk-1234567890abcdef to authenticate"];
775 let suggestions = detect_parameterizable_values(&texts);
776 assert!(
777 suggestions
778 .iter()
779 .any(|s| s.category == DetectedCategory::ApiKey)
780 );
781 assert_eq!(
782 suggestions
783 .iter()
784 .find(|s| s.category == DetectedCategory::ApiKey)
785 .unwrap()
786 .suggested_name,
787 "api_key"
788 );
789 }
790
791 #[test]
792 fn test_detect_github_token() {
793 let texts = vec!["export GITHUB_TOKEN=ghp_abcdefghijklmnopqrstuvwxyz012345"];
794 let suggestions = detect_parameterizable_values(&texts);
795 assert!(
796 suggestions
797 .iter()
798 .any(|s| s.suggested_name == "github_token")
799 );
800 }
801
802 #[test]
803 fn test_detect_email() {
804 let texts = vec!["Send notification to admin@company.com"];
805 let suggestions = detect_parameterizable_values(&texts);
806 assert!(
807 suggestions
808 .iter()
809 .any(|s| s.category == DetectedCategory::Email)
810 );
811 }
812
813 #[test]
814 fn test_detect_database_url() {
815 let texts = vec!["DATABASE_URL=postgres://user:pass@db.example.com:5432/mydb"];
816 let suggestions = detect_parameterizable_values(&texts);
817 assert!(
818 suggestions
819 .iter()
820 .any(|s| s.category == DetectedCategory::DatabaseUrl)
821 );
822 }
823
824 #[test]
825 fn test_detect_git_ssh() {
826 let texts = vec!["git clone git@github.com:user/repo.git"];
827 let suggestions = detect_parameterizable_values(&texts);
828 assert!(
829 suggestions
830 .iter()
831 .any(|s| s.category == DetectedCategory::GitRepo)
832 );
833 }
834
835 #[test]
836 fn test_apply_parameterization() {
837 let suggestions = vec![ParameterSuggestion {
838 original_value: "https://api.example.com".to_string(),
839 suggested_name: "api_url".to_string(),
840 description: "API URL".to_string(),
841 category: DetectedCategory::Url,
842 confidence: 0.9,
843 }];
844 let result = apply_parameterization("Deploy to https://api.example.com/v1", &suggestions);
845 assert_eq!(result, "Deploy to {{api_url}}/v1");
846 }
847
848 #[test]
849 fn test_no_false_positives_on_normal_text() {
850 let texts = vec!["Run cargo build and then cargo test"];
851 let suggestions = detect_parameterizable_values(&texts);
852 assert!(suggestions.is_empty());
853 }
854
855 #[test]
856 fn test_deduplication() {
857 let texts = vec![
858 "Deploy to https://api.example.com",
859 "Also check https://api.example.com/health",
860 ];
861 let suggestions = detect_parameterizable_values(&texts);
862 let url_count = suggestions
864 .iter()
865 .filter(|s| s.category == DetectedCategory::Url)
866 .count();
867 assert!(url_count <= 2); }
869
870 #[test]
871 fn test_format_display() {
872 let suggestions = vec![ParameterSuggestion {
873 original_value: "https://api.example.com".to_string(),
874 suggested_name: "api_url".to_string(),
875 description: "API endpoint URL".to_string(),
876 category: DetectedCategory::Url,
877 confidence: 0.9,
878 }];
879 let display = format_suggestions_display(&suggestions);
880 assert!(display.contains("api_url"));
881 assert!(display.contains("URL"));
882 }
883}