1use crate::map::types::OpCode;
23use regex::Regex;
24use scraper::{Html, Selector};
25use serde::{Deserialize, Serialize};
26use std::sync::OnceLock;
27
28const PLATFORM_ACTIONS_JSON: &str = include_str!("platform_actions.json");
32
33#[derive(Debug, Clone, Serialize, Deserialize)]
37pub struct FormField {
38 pub name: String,
40 pub field_type: String,
42 pub value: Option<String>,
44 pub required: bool,
46}
47
48#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct HttpAction {
51 pub opcode: OpCode,
53 pub label: String,
55 pub source: ActionSource,
57 pub confidence: f32,
59}
60
61#[derive(Debug, Clone, Serialize, Deserialize)]
63pub enum ActionSource {
64 Form {
66 action_url: String,
68 method: String,
70 content_type: String,
72 fields: Vec<FormField>,
74 },
75 Api {
77 url: String,
79 method: String,
81 body_template: Option<String>,
83 },
84 Platform {
86 platform: String,
88 action_type: String,
90 },
91}
92
93#[derive(Debug, Clone, PartialEq, Eq)]
95pub enum DetectedPlatform {
96 Shopify,
98 WooCommerce,
100 Magento,
102 BigCommerce,
104 Squarespace,
106 Wix,
108 PrestaShop,
110 OpenCart,
112 NextJsCommerce,
114 WordPress,
116 Drupal,
118 Unknown,
120}
121
122#[derive(Debug, Deserialize)]
125struct PlatformConfig {
126 indicators: PlatformIndicators,
127 actions: Vec<PlatformActionTemplate>,
128}
129
130#[derive(Debug, Deserialize)]
131struct PlatformIndicators {
132 js_patterns: Vec<String>,
133 html_patterns: Vec<String>,
134}
135
136#[derive(Debug, Deserialize)]
137struct PlatformActionTemplate {
138 label: String,
139 opcode: PlatformOpCode,
140 action_type: String,
141 #[allow(dead_code)]
142 url_template: String,
143 #[allow(dead_code)]
144 method: String,
145 confidence: f32,
146}
147
148#[derive(Debug, Deserialize)]
149struct PlatformOpCode {
150 category: u8,
151 action: u8,
152}
153
154type PlatformRegistry = std::collections::HashMap<String, PlatformConfig>;
155
156fn platform_registry() -> &'static PlatformRegistry {
158 static REGISTRY: OnceLock<PlatformRegistry> = OnceLock::new();
159 REGISTRY.get_or_init(|| serde_json::from_str(PLATFORM_ACTIONS_JSON).unwrap_or_default())
160}
161
162pub fn discover_actions_from_html(html: &str, base_url: &str) -> Vec<HttpAction> {
180 let document = Html::parse_document(html);
181 let mut actions = Vec::new();
182
183 let form_sel = match Selector::parse("form") {
185 Ok(s) => s,
186 Err(_) => return actions,
187 };
188 let field_sel = Selector::parse("input, select, textarea").expect("field selector is valid");
189 let button_sel =
190 Selector::parse("button, input[type=\"submit\"]").expect("button selector is valid");
191
192 for form in document.select(&form_sel) {
193 let action_raw = form.value().attr("action").unwrap_or("");
194 let action_url = resolve_url(base_url, action_raw);
195 let method = form.value().attr("method").unwrap_or("GET").to_uppercase();
196 let enctype = form
197 .value()
198 .attr("enctype")
199 .unwrap_or("application/x-www-form-urlencoded")
200 .to_string();
201
202 let mut fields = Vec::new();
203 for field in form.select(&field_sel) {
204 let name = field.value().attr("name").unwrap_or("").to_string();
205 if name.is_empty() {
206 continue;
207 }
208 let field_type = field
209 .value()
210 .attr("type")
211 .unwrap_or(field.value().name())
212 .to_string();
213 let value = field.value().attr("value").map(String::from);
214 let required = field.value().attr("required").is_some();
215
216 fields.push(FormField {
217 name,
218 field_type,
219 value,
220 required,
221 });
222 }
223
224 let submit_label = form
226 .select(&button_sel)
227 .next()
228 .map(|el| {
229 el.value()
230 .attr("value")
231 .map(String::from)
232 .unwrap_or_else(|| element_text(&el))
233 })
234 .filter(|s| !s.is_empty());
235
236 let label = submit_label.unwrap_or_else(|| format!("Form \u{2192} {action_url}"));
237 let opcode = classify_form_opcode(&label, &action_url);
238
239 actions.push(HttpAction {
240 opcode,
241 label,
242 source: ActionSource::Form {
243 action_url,
244 method,
245 content_type: enctype,
246 fields,
247 },
248 confidence: 0.90,
249 });
250 }
251
252 actions
253}
254
255pub fn discover_actions_from_js(js_source: &str, base_url: &str) -> Vec<HttpAction> {
269 let mut actions = Vec::new();
270 let mut seen_urls: std::collections::HashSet<String> = std::collections::HashSet::new();
271
272 let fetch_re =
274 Regex::new(r#"fetch\(\s*['"]([^'"]+)['"]\s*,\s*\{[^}]*method\s*:\s*['"](\w+)['"]"#)
275 .expect("fetch regex is valid");
276
277 for caps in fetch_re.captures_iter(js_source) {
278 let url_raw = caps.get(1).map_or("", |m| m.as_str());
279 let method = caps.get(2).map_or("GET", |m| m.as_str()).to_uppercase();
280 let url = resolve_url(base_url, url_raw);
281 if seen_urls.insert(format!("{method}:{url}")) {
282 actions.push(HttpAction {
283 opcode: classify_api_opcode(&url, &method),
284 label: format!("{method} {url_raw}"),
285 source: ActionSource::Api {
286 url,
287 method,
288 body_template: None,
289 },
290 confidence: 0.80,
291 });
292 }
293 }
294
295 let fetch_simple_re =
297 Regex::new(r#"fetch\(\s*['"]([^'"]+)['"]\s*\)"#).expect("fetch simple regex is valid");
298
299 for caps in fetch_simple_re.captures_iter(js_source) {
300 let url_raw = caps.get(1).map_or("", |m| m.as_str());
301 let method = "GET".to_string();
302 let url = resolve_url(base_url, url_raw);
303 if seen_urls.insert(format!("{method}:{url}")) {
304 actions.push(HttpAction {
305 opcode: classify_api_opcode(&url, &method),
306 label: format!("GET {url_raw}"),
307 source: ActionSource::Api {
308 url,
309 method,
310 body_template: None,
311 },
312 confidence: 0.70,
313 });
314 }
315 }
316
317 let axios_re = Regex::new(r#"axios\.(get|post|put|delete|patch)\(\s*['"]([^'"]+)['"]"#)
319 .expect("axios regex is valid");
320
321 for caps in axios_re.captures_iter(js_source) {
322 let method = caps.get(1).map_or("GET", |m| m.as_str()).to_uppercase();
323 let url_raw = caps.get(2).map_or("", |m| m.as_str());
324 let url = resolve_url(base_url, url_raw);
325 if seen_urls.insert(format!("{method}:{url}")) {
326 actions.push(HttpAction {
327 opcode: classify_api_opcode(&url, &method),
328 label: format!("{method} {url_raw}"),
329 source: ActionSource::Api {
330 url,
331 method,
332 body_template: None,
333 },
334 confidence: 0.80,
335 });
336 }
337 }
338
339 let ajax_block_re =
341 Regex::new(r#"\$\.ajax\(\s*\{([^}]*)\}"#).expect("ajax block regex is valid");
342 let inner_url_re =
343 Regex::new(r#"url\s*:\s*['"]([^'"]+)['"]"#).expect("inner url regex is valid");
344 let inner_type_re =
345 Regex::new(r#"type\s*:\s*['"](\w+)['"]"#).expect("inner type regex is valid");
346
347 for caps in ajax_block_re.captures_iter(js_source) {
348 let block = caps.get(1).map_or("", |m| m.as_str());
349 if let Some(url_caps) = inner_url_re.captures(block) {
350 let url_raw = url_caps.get(1).map_or("", |m| m.as_str());
351 let method = inner_type_re
352 .captures(block)
353 .and_then(|c| c.get(1))
354 .map_or("GET", |m| m.as_str())
355 .to_uppercase();
356 let url = resolve_url(base_url, url_raw);
357 if seen_urls.insert(format!("{method}:{url}")) {
358 actions.push(HttpAction {
359 opcode: classify_api_opcode(&url, &method),
360 label: format!("{method} {url_raw}"),
361 source: ActionSource::Api {
362 url,
363 method,
364 body_template: None,
365 },
366 confidence: 0.75,
367 });
368 }
369 }
370 }
371
372 let api_path_re = Regex::new(r#"['"](/api/[^'"]+)['"]"#).expect("api path regex is valid");
374
375 for caps in api_path_re.captures_iter(js_source) {
376 let url_raw = caps.get(1).map_or("", |m| m.as_str());
377 let url = resolve_url(base_url, url_raw);
378 let method = "GET".to_string();
379 if seen_urls.insert(format!("{method}:{url}")) {
380 actions.push(HttpAction {
381 opcode: classify_api_opcode(&url, &method),
382 label: format!("API {url_raw}"),
383 source: ActionSource::Api {
384 url,
385 method,
386 body_template: None,
387 },
388 confidence: 0.60,
389 });
390 }
391 }
392
393 let graphql_re =
395 Regex::new(r#"['"]([^'"]*(?:/graphql|/gql)[^'"]*)['"]"#).expect("graphql regex is valid");
396
397 for caps in graphql_re.captures_iter(js_source) {
398 let url_raw = caps.get(1).map_or("", |m| m.as_str());
399 if url_raw.is_empty() || url_raw.len() > 200 {
400 continue;
401 }
402 let url = resolve_url(base_url, url_raw);
403 let method = "POST".to_string();
404 if seen_urls.insert(format!("{method}:{url}")) {
405 actions.push(HttpAction {
406 opcode: classify_api_opcode(&url, &method),
407 label: format!("GraphQL {url_raw}"),
408 source: ActionSource::Api {
409 url,
410 method,
411 body_template: Some(r#"{"query":"","variables":{}}"#.to_string()),
412 },
413 confidence: 0.75,
414 });
415 }
416 }
417
418 let xhr_re = Regex::new(r#"\.open\(\s*['"](\w+)['"]\s*,\s*['"]([^'"]+)['"]"#)
420 .expect("xhr regex is valid");
421
422 for caps in xhr_re.captures_iter(js_source) {
423 let method = caps.get(1).map_or("GET", |m| m.as_str()).to_uppercase();
424 let url_raw = caps.get(2).map_or("", |m| m.as_str());
425 if url_raw.is_empty() || url_raw.len() > 200 {
426 continue;
427 }
428 let url = resolve_url(base_url, url_raw);
429 if seen_urls.insert(format!("{method}:{url}")) {
430 actions.push(HttpAction {
431 opcode: classify_api_opcode(&url, &method),
432 label: format!("{method} {url_raw}"),
433 source: ActionSource::Api {
434 url,
435 method,
436 body_template: None,
437 },
438 confidence: 0.70,
439 });
440 }
441 }
442
443 let rest_v_re =
445 Regex::new(r#"['"]([^'"]*?/v[0-9]+/[^'"]+)['"]"#).expect("rest version regex is valid");
446
447 for caps in rest_v_re.captures_iter(js_source) {
448 let url_raw = caps.get(1).map_or("", |m| m.as_str());
449 if url_raw.is_empty() || url_raw.len() > 200 || !url_raw.starts_with('/') {
450 continue;
451 }
452 let url = resolve_url(base_url, url_raw);
453 let method = "GET".to_string();
454 if seen_urls.insert(format!("{method}:{url}")) {
455 actions.push(HttpAction {
456 opcode: classify_api_opcode(&url, &method),
457 label: format!("REST {url_raw}"),
458 source: ActionSource::Api {
459 url,
460 method,
461 body_template: None,
462 },
463 confidence: 0.55,
464 });
465 }
466 }
467
468 actions
469}
470
471pub fn discover_actions_from_platform(_domain: &str, page_html: &str) -> Vec<HttpAction> {
487 let platform = detect_platform(page_html);
488 if platform == DetectedPlatform::Unknown {
489 return Vec::new();
490 }
491
492 let platform_key = match platform {
493 DetectedPlatform::Shopify => "shopify",
494 DetectedPlatform::WooCommerce => "woocommerce",
495 DetectedPlatform::Magento => "magento",
496 DetectedPlatform::BigCommerce => "bigcommerce",
497 DetectedPlatform::Squarespace => "squarespace",
498 DetectedPlatform::Wix => "wix",
499 DetectedPlatform::PrestaShop => "prestashop",
500 DetectedPlatform::OpenCart => "opencart",
501 DetectedPlatform::NextJsCommerce => "nextjs_commerce",
502 DetectedPlatform::WordPress => "wordpress",
503 DetectedPlatform::Drupal => "drupal",
504 DetectedPlatform::Unknown => return Vec::new(),
505 };
506
507 let registry = platform_registry();
508 let config = match registry.get(platform_key) {
509 Some(c) => c,
510 None => return Vec::new(),
511 };
512
513 config
514 .actions
515 .iter()
516 .map(|tmpl| HttpAction {
517 opcode: OpCode::new(tmpl.opcode.category, tmpl.opcode.action),
518 label: tmpl.label.clone(),
519 source: ActionSource::Platform {
520 platform: platform_key.to_string(),
521 action_type: tmpl.action_type.clone(),
522 },
523 confidence: tmpl.confidence,
524 })
525 .collect()
526}
527
528pub fn detect_platform(html: &str) -> DetectedPlatform {
542 let registry = platform_registry();
543
544 let platform_order = [
546 "shopify",
547 "woocommerce",
548 "magento",
549 "bigcommerce",
550 "squarespace",
551 "wix",
552 "prestashop",
553 "opencart",
554 "nextjs_commerce",
555 "drupal",
556 "wordpress",
557 ];
558
559 for &key in &platform_order {
560 if let Some(config) = registry.get(key) {
561 let matched = config
562 .indicators
563 .js_patterns
564 .iter()
565 .any(|pat| html.contains(pat.as_str()))
566 || config
567 .indicators
568 .html_patterns
569 .iter()
570 .any(|pat| html.contains(pat.as_str()));
571
572 if matched {
573 return match key {
574 "shopify" => DetectedPlatform::Shopify,
575 "woocommerce" => DetectedPlatform::WooCommerce,
576 "magento" => DetectedPlatform::Magento,
577 "bigcommerce" => DetectedPlatform::BigCommerce,
578 "squarespace" => DetectedPlatform::Squarespace,
579 "wix" => DetectedPlatform::Wix,
580 "prestashop" => DetectedPlatform::PrestaShop,
581 "opencart" => DetectedPlatform::OpenCart,
582 "nextjs_commerce" => DetectedPlatform::NextJsCommerce,
583 "wordpress" => DetectedPlatform::WordPress,
584 "drupal" => DetectedPlatform::Drupal,
585 _ => DetectedPlatform::Unknown,
586 };
587 }
588 }
589 }
590
591 DetectedPlatform::Unknown
592}
593
594fn element_text(el: &scraper::ElementRef<'_>) -> String {
599 el.text()
600 .collect::<Vec<_>>()
601 .join(" ")
602 .split_whitespace()
603 .collect::<Vec<_>>()
604 .join(" ")
605}
606
607fn resolve_url(base_url: &str, relative: &str) -> String {
609 if relative.is_empty() {
610 return base_url.to_string();
611 }
612 if relative.starts_with("http://") || relative.starts_with("https://") {
613 return relative.to_string();
614 }
615 if let Ok(base) = url::Url::parse(base_url) {
616 if let Ok(resolved) = base.join(relative) {
617 return resolved.to_string();
618 }
619 }
620 relative.to_string()
621}
622
623fn classify_form_opcode(label: &str, action_url: &str) -> OpCode {
625 let label_lower = label.to_lowercase();
626 let url_lower = action_url.to_lowercase();
627
628 if label_lower.contains("add to cart")
629 || label_lower.contains("add to bag")
630 || url_lower.contains("/cart/add")
631 {
632 return OpCode::new(0x02, 0x00); }
634 if label_lower.contains("search") || url_lower.contains("/search") {
635 return OpCode::new(0x00, 0x01); }
637 if label_lower.contains("log in")
638 || label_lower.contains("login")
639 || label_lower.contains("sign in")
640 || url_lower.contains("/login")
641 || url_lower.contains("/signin")
642 {
643 return OpCode::new(0x04, 0x01); }
645 if label_lower.contains("subscribe") || label_lower.contains("newsletter") {
646 return OpCode::new(0x04, 0x03); }
648 if label_lower.contains("register")
649 || label_lower.contains("sign up")
650 || label_lower.contains("signup")
651 {
652 return OpCode::new(0x04, 0x02); }
654
655 OpCode::new(0x04, 0x00)
657}
658
659fn classify_api_opcode(url: &str, method: &str) -> OpCode {
661 let url_lower = url.to_lowercase();
662
663 if url_lower.contains("/cart") || url_lower.contains("/basket") {
664 return match method {
665 "POST" => OpCode::new(0x02, 0x00), "DELETE" => OpCode::new(0x02, 0x01), "PUT" | "PATCH" => OpCode::new(0x02, 0x02), _ => OpCode::new(0x02, 0x06), };
670 }
671 if url_lower.contains("/search") || url_lower.contains("/query") {
672 return OpCode::new(0x00, 0x01); }
674 if url_lower.contains("/auth") || url_lower.contains("/login") || url_lower.contains("/session")
675 {
676 return OpCode::new(0x04, 0x01); }
678
679 OpCode::new(0x06, 0x00) }
682
683#[cfg(test)]
686mod tests {
687 use super::*;
688
689 #[test]
690 fn test_discover_form_actions() {
691 let html = r#"
692 <html><body>
693 <form action="/search" method="GET">
694 <input type="text" name="q" required />
695 <input type="hidden" name="ref" value="nav_search" />
696 <button type="submit">Search</button>
697 </form>
698 </body></html>
699 "#;
700
701 let actions = discover_actions_from_html(html, "https://example.com");
702 assert_eq!(actions.len(), 1);
703
704 let action = &actions[0];
705 if let ActionSource::Form { fields, .. } = &action.source {
706 assert_eq!(fields.len(), 2);
707
708 let q_field = fields.iter().find(|f| f.name == "q").unwrap();
709 assert_eq!(q_field.field_type, "text");
710 assert!(q_field.required);
711
712 let ref_field = fields.iter().find(|f| f.name == "ref").unwrap();
713 assert_eq!(ref_field.field_type, "hidden");
714 assert_eq!(ref_field.value.as_deref(), Some("nav_search"));
715 } else {
716 panic!("expected ActionSource::Form");
717 }
718 }
719
720 #[test]
721 fn test_discover_form_post_action() {
722 let html = r#"
723 <html><body>
724 <form action="/login" method="POST">
725 <input type="text" name="username" />
726 <input type="password" name="password" />
727 <button type="submit">Log In</button>
728 </form>
729 </body></html>
730 "#;
731
732 let actions = discover_actions_from_html(html, "https://example.com");
733 assert_eq!(actions.len(), 1);
734
735 let action = &actions[0];
736 if let ActionSource::Form {
737 action_url, method, ..
738 } = &action.source
739 {
740 assert_eq!(action_url, "https://example.com/login");
741 assert_eq!(method, "POST");
742 } else {
743 panic!("expected ActionSource::Form");
744 }
745 }
746
747 #[test]
748 fn test_discover_js_fetch() {
749 let js = r#"
750 async function addItem(id) {
751 const resp = await fetch('/api/cart', {method: 'POST', body: JSON.stringify({id})});
752 return resp.json();
753 }
754 "#;
755
756 let actions = discover_actions_from_js(js, "https://shop.example.com");
757 assert!(!actions.is_empty());
758
759 let cart_action = actions
760 .iter()
761 .find(|a| {
762 matches!(
763 &a.source,
764 ActionSource::Api { url, .. } if url.contains("/api/cart")
765 )
766 })
767 .expect("should find /api/cart action");
768
769 if let ActionSource::Api { method, .. } = &cart_action.source {
770 assert_eq!(method, "POST");
771 } else {
772 panic!("expected ActionSource::Api");
773 }
774 }
775
776 #[test]
777 fn test_discover_js_axios() {
778 let js = r#"
779 axios.post('/api/items', { name: 'widget' });
780 axios.get('/api/items/123');
781 "#;
782
783 let actions = discover_actions_from_js(js, "https://example.com");
784 assert!(actions.len() >= 2);
785
786 let post_action = actions
787 .iter()
788 .find(|a| {
789 matches!(
790 &a.source,
791 ActionSource::Api { url, method, .. }
792 if url.contains("/api/items") && !url.contains("/123") && method == "POST"
793 )
794 })
795 .expect("should find POST /api/items");
796 assert!(
797 matches!(&post_action.source, ActionSource::Api { method, .. } if method == "POST")
798 );
799
800 let get_action = actions
801 .iter()
802 .find(|a| {
803 matches!(
804 &a.source,
805 ActionSource::Api { url, method, .. }
806 if url.contains("/api/items/123") && method == "GET"
807 )
808 })
809 .expect("should find GET /api/items/123");
810 assert!(matches!(&get_action.source, ActionSource::Api { method, .. } if method == "GET"));
811 }
812
813 #[test]
814 fn test_detect_shopify() {
815 let html = r#"
816 <html>
817 <head>
818 <script src="https://cdn.shopify.com/s/files/1/shop.js"></script>
819 </head>
820 <body>
821 <div id="product">Widget</div>
822 </body>
823 </html>
824 "#;
825
826 assert_eq!(detect_platform(html), DetectedPlatform::Shopify);
827 }
828
829 #[test]
830 fn test_detect_woocommerce() {
831 let html = r#"
832 <html>
833 <body class="woocommerce woocommerce-page">
834 <div class="product">
835 <a href="/wp-content/plugins/woocommerce/assets/style.css"></a>
836 </div>
837 </body>
838 </html>
839 "#;
840
841 assert_eq!(detect_platform(html), DetectedPlatform::WooCommerce);
842 }
843
844 #[test]
845 fn test_detect_no_platform() {
846 let html = r#"
847 <html>
848 <body>
849 <h1>My Personal Blog</h1>
850 <p>Nothing to see here.</p>
851 </body>
852 </html>
853 "#;
854
855 assert_eq!(detect_platform(html), DetectedPlatform::Unknown);
856 }
857
858 #[test]
859 fn test_discover_platform_actions_shopify() {
860 let html = r#"
861 <html>
862 <head>
863 <script>
864 Shopify.shop = "mystore.myshopify.com";
865 </script>
866 </head>
867 <body>
868 <div class="product">Widget</div>
869 </body>
870 </html>
871 "#;
872
873 let actions = discover_actions_from_platform("mystore.myshopify.com", html);
874 assert!(!actions.is_empty());
875
876 let add_to_cart = actions
877 .iter()
878 .find(|a| {
879 matches!(
880 &a.source,
881 ActionSource::Platform { action_type, .. } if action_type == "add_to_cart"
882 )
883 })
884 .expect("should find add_to_cart action");
885
886 assert_eq!(add_to_cart.opcode, OpCode::new(0x02, 0x00));
887 assert!(add_to_cart.confidence > 0.0);
888 }
889
890 #[test]
891 fn test_empty_html() {
892 let actions = discover_actions_from_html("", "https://example.com");
893 assert!(actions.is_empty());
894 }
895
896 #[test]
897 fn test_form_csrf_token() {
898 let html = r#"
899 <html><body>
900 <form action="/transfer" method="POST">
901 <input type="hidden" name="_csrf" value="abc123def456" />
902 <input type="hidden" name="_token" value="xyz789" />
903 <input type="number" name="amount" required />
904 <button type="submit">Submit</button>
905 </form>
906 </body></html>
907 "#;
908
909 let actions = discover_actions_from_html(html, "https://bank.example.com");
910 assert_eq!(actions.len(), 1);
911
912 if let ActionSource::Form { fields, .. } = &actions[0].source {
913 let csrf = fields
914 .iter()
915 .find(|f| f.name == "_csrf")
916 .expect("should find _csrf field");
917 assert_eq!(csrf.field_type, "hidden");
918 assert_eq!(csrf.value.as_deref(), Some("abc123def456"));
919
920 let token = fields
921 .iter()
922 .find(|f| f.name == "_token")
923 .expect("should find _token field");
924 assert_eq!(token.field_type, "hidden");
925 assert_eq!(token.value.as_deref(), Some("xyz789"));
926 } else {
927 panic!("expected ActionSource::Form");
928 }
929 }
930}