Skip to main content

erroracle/
classifier.rs

1use crate::types::{
2    ErrorInsight, HttpResponse, InsightKind, PropertySuggestion, ResponseAnalysis,
3    ResponseDisposition, MAX_ANALYSIS_BODY_CHARS,
4};
5use regex_lite::Regex;
6
7pub fn classify_response(response: &HttpResponse) -> ResponseAnalysis {
8    let body = bounded_body(&response.body);
9    let lower = body.to_lowercase();
10    let insight = analyze_error(&body);
11    let disposition = classify_disposition(response.status, &lower, insight.as_ref());
12
13    ResponseAnalysis {
14        disposition,
15        fingerprint: fingerprint(response.status, &body),
16        insight,
17    }
18}
19
20pub fn analyze_error(error: &str) -> Option<ErrorInsight> {
21    let bounded = bounded_body(error);
22    let lower = bounded.to_lowercase();
23
24    detect_sql_syntax_issue(&lower, &bounded)
25        .or_else(|| detect_missing_property_issue(&lower, &bounded))
26        .or_else(|| detect_type_mismatch_issue(&bounded))
27        .or_else(|| detect_unknown_dispatch_issue(&lower))
28        .or_else(|| detect_auth_issue(&bounded))
29        .or_else(|| detect_parse_issue(&lower, &bounded))
30        .or_else(|| detect_misc_issue(&lower, &bounded))
31}
32
33fn bounded_body(body: &str) -> String {
34    body.chars().take(MAX_ANALYSIS_BODY_CHARS).collect()
35}
36
37fn classify_disposition(
38    status: u16,
39    lower: &str,
40    insight: Option<&ErrorInsight>,
41) -> ResponseDisposition {
42    if !is_valid_http_status(status) {
43        return ResponseDisposition::Unknown;
44    }
45
46    if is_waf_block(status, lower) {
47        return ResponseDisposition::WafBlock;
48    }
49
50    if is_rate_limited_response(status, lower) {
51        return ResponseDisposition::RateLimited;
52    }
53
54    if is_auth_required_response(status, lower) {
55        return ResponseDisposition::AuthRequired;
56    }
57
58    if is_json_parse_error(insight) {
59        return ResponseDisposition::JsonParseError;
60    }
61
62    if is_application_error(status, insight) {
63        return ResponseDisposition::ApplicationError;
64    }
65
66    if (200..400).contains(&status) && insight.is_none() {
67        return ResponseDisposition::FalsePositive;
68    }
69
70    ResponseDisposition::Unknown
71}
72
73fn is_valid_http_status(status: u16) -> bool {
74    (100..=599).contains(&status)
75}
76
77fn is_rate_limited_response(status: u16, lower: &str) -> bool {
78    status == 429 || lower.contains("too many requests") || lower.contains("rate limit")
79}
80
81fn is_auth_required_response(status: u16, lower: &str) -> bool {
82    status == 401 || status == 403 || lower.contains("unauthorized") || lower.contains("forbidden")
83}
84
85fn is_json_parse_error(insight: Option<&ErrorInsight>) -> bool {
86    matches!(
87        insight.map(|item| &item.kind),
88        Some(InsightKind::JsonParseError)
89    )
90}
91
92fn is_application_error(status: u16, insight: Option<&ErrorInsight>) -> bool {
93    status >= 500
94        || matches!(
95            insight.map(|item| &item.kind),
96            Some(
97                InsightKind::SqlSyntaxError
98                    | InsightKind::MissingProperty(_)
99                    | InsightKind::TypeMismatch { .. }
100                    | InsightKind::UnknownDispatch { .. }
101                    | InsightKind::Other
102            )
103        )
104}
105
106fn detect_sql_syntax_issue(lower: &str, error: &str) -> Option<ErrorInsight> {
107    if (lower.contains("syntax error")
108        && (lower.contains("sql") || lower.contains("near") || lower.contains('\'')))
109        || lower.contains("unterminated string")
110        || lower.contains("unclosed quotation")
111        || lower.contains("sqlstate")
112    {
113        return Some(ErrorInsight {
114            kind: InsightKind::SqlSyntaxError,
115            error_text: error.to_string(),
116            suggestions: vec![],
117        });
118    }
119
120    None
121}
122
123fn detect_missing_property_issue(lower: &str, error: &str) -> Option<ErrorInsight> {
124    extract_missing_property(lower).map(|property| ErrorInsight {
125        kind: InsightKind::MissingProperty(property.clone()),
126        error_text: error.to_string(),
127        suggestions: vec![PropertySuggestion {
128            key: property.clone(),
129            value: "test".into(),
130            reason: format!("handler tried to access missing property `{property}`"),
131        }],
132    })
133}
134
135fn detect_type_mismatch_issue(error: &str) -> Option<ErrorInsight> {
136    if let Some((property, expected)) = extract_type_mismatch(error) {
137        return Some(ErrorInsight {
138            kind: InsightKind::TypeMismatch { property, expected },
139            error_text: error.to_string(),
140            suggestions: vec![],
141        });
142    }
143
144    let lower = error.to_lowercase();
145    if lower.contains("is not a function") {
146        static RE: std::sync::OnceLock<Regex> = std::sync::OnceLock::new();
147        let expected =
148            if lower.contains("map") || lower.contains("foreach") || lower.contains("filter") {
149                "array"
150            } else if lower.contains("touppercase")
151                || lower.contains("tolowercase")
152                || lower.contains("trim")
153            {
154                "string"
155            } else if lower.contains("tofixed") || lower.contains("tostring") {
156                "number"
157            } else {
158                "object"
159            };
160        let re = RE.get_or_init(|| {
161            Regex::new(r"(\w+)\.\w+\s+is not a function").unwrap_or_else(|_| unreachable!())
162        });
163        let property = re
164            .captures(&lower)
165            .and_then(|captures| captures.get(1))
166            .map(|value| value.as_str().to_string());
167        return Some(ErrorInsight {
168            kind: InsightKind::TypeMismatch {
169                property,
170                expected: expected.to_string(),
171            },
172            error_text: error.to_string(),
173            suggestions: vec![],
174        });
175    }
176    None
177}
178
179fn detect_unknown_dispatch_issue(lower: &str) -> Option<ErrorInsight> {
180    extract_unknown_dispatch(lower).map(|(property, tried)| ErrorInsight {
181        kind: InsightKind::UnknownDispatch { property, tried },
182        error_text: lower.to_string(),
183        suggestions: vec![],
184    })
185}
186
187fn detect_auth_issue(error: &str) -> Option<ErrorInsight> {
188    let lower = error.to_lowercase();
189    if lower.contains("unauthorized")
190        || lower.contains("permission denied")
191        || lower.contains("access denied")
192        || lower.contains("forbidden")
193        || lower.contains("authentication required")
194    {
195        return Some(ErrorInsight {
196            kind: InsightKind::AuthRequired,
197            error_text: error.to_string(),
198            suggestions: vec![],
199        });
200    }
201    None
202}
203
204fn detect_parse_issue(lower: &str, error: &str) -> Option<ErrorInsight> {
205    if (lower.contains("unexpected token") && lower.contains("json"))
206        || lower.contains("invalid json")
207        || lower.contains("json.parse")
208    {
209        return Some(ErrorInsight {
210            kind: InsightKind::JsonParseError,
211            error_text: error.to_string(),
212            suggestions: vec![],
213        });
214    }
215
216    if lower.contains("rate limit") || lower.contains("too many requests") {
217        return Some(ErrorInsight {
218            kind: InsightKind::RateLimited,
219            error_text: error.to_string(),
220            suggestions: vec![],
221        });
222    }
223    None
224}
225
226fn detect_misc_issue(lower: &str, error: &str) -> Option<ErrorInsight> {
227    if lower.contains("no such file")
228        || lower.contains("enoent")
229        || lower.contains("file not found")
230        || lower.contains("path")
231    {
232        return Some(ErrorInsight {
233            kind: InsightKind::Other,
234            error_text: error.to_string(),
235            suggestions: vec![PropertySuggestion {
236                key: "path".into(),
237                value: "/etc/passwd".into(),
238                reason: "filesystem access is reachable; test traversal-style payloads".into(),
239            }],
240        });
241    }
242
243    if lower.contains("invalid regular expression")
244        || lower.contains("unterminated group")
245        || lower.contains("nothing to repeat")
246    {
247        return Some(ErrorInsight {
248            kind: InsightKind::Other,
249            error_text: error.to_string(),
250            suggestions: vec![PropertySuggestion {
251                key: "pattern".into(),
252                value: "(a+)+$".into(),
253                reason: "regex compilation was observed; test ReDoS-sensitive patterns".into(),
254            }],
255        });
256    }
257
258    if lower.contains("invalid url")
259        || lower.contains("failed to construct 'url'")
260        || lower.contains("malformed url")
261    {
262        return Some(ErrorInsight {
263            kind: InsightKind::Other,
264            error_text: error.to_string(),
265            suggestions: vec![PropertySuggestion {
266                key: "url".into(),
267                value: "https://127.0.0.1/admin".into(),
268                reason: "URL parsing was observed; SSRF-relevant follow-ups may apply".into(),
269            }],
270        });
271    }
272
273    if lower.contains("nan")
274        || lower.contains("not a number")
275        || lower.contains("parseint")
276        || lower.contains("parsefloat")
277    {
278        return Some(ErrorInsight {
279            kind: InsightKind::TypeMismatch {
280                property: None,
281                expected: "number".into(),
282            },
283            error_text: error.to_string(),
284            suggestions: vec![],
285        });
286    }
287
288    if error.len() > 10 && !lower.contains("undefined") {
289        return Some(ErrorInsight {
290            kind: InsightKind::Other,
291            error_text: error.to_string(),
292            suggestions: vec![],
293        });
294    }
295
296    None
297}
298
299fn extract_type_mismatch(error: &str) -> Option<(Option<String>, String)> {
300    static RE1: std::sync::OnceLock<Regex> = std::sync::OnceLock::new();
301    static RE2: std::sync::OnceLock<Regex> = std::sync::OnceLock::new();
302    static RE3: std::sync::OnceLock<Regex> = std::sync::OnceLock::new();
303
304    let lower = error.to_lowercase();
305
306    let re1 = RE1.get_or_init(|| {
307        Regex::new(r"([a-z_]+)\s+must\s+be\s+(?:a\s+)?([a-z]+)\b(?:\s+or\s+([a-z]+))?")
308            .unwrap_or_else(|_| unreachable!())
309    });
310    if let Some(captures) = re1.captures(&lower) {
311        let property = captures.get(1).map(|item| item.as_str().to_string());
312        let expected = captures.get(2).or_else(|| captures.get(3))?;
313        return Some((property, expected.as_str().to_string()));
314    }
315
316    let re2 = RE2.get_or_init(|| {
317        Regex::new(
318            r"typeerror:\s*([a-z_]+)\s*:\s*expected\s+(?:a\s+)?([a-z]+),?\s*got\s+(?:a\s+)?([a-z]+)",
319        )
320        .unwrap_or_else(|_| unreachable!())
321    });
322    if let Some(captures) = re2.captures(&lower) {
323        let property = Some(captures.get(1)?.as_str().to_string());
324        let expected = captures.get(2)?.as_str().to_string();
325        return Some((property, expected));
326    }
327
328    let re3 = RE3.get_or_init(|| {
329        Regex::new(r"expected\s+(?:a\s+)?([a-z]+),\s*got\s+(?:a\s+)?([a-z]+)")
330            .unwrap_or_else(|_| unreachable!())
331    });
332    if let Some(captures) = re3.captures(&lower) {
333        let expected = captures.get(1)?.as_str().to_string();
334        return Some((None, expected));
335    }
336
337    None
338}
339
340fn extract_missing_property(error: &str) -> Option<String> {
341    static PATTERNS: std::sync::OnceLock<Vec<Regex>> = std::sync::OnceLock::new();
342    let patterns = PATTERNS.get_or_init(|| {
343        [
344            r"cannot read propert(?:y|ies) (?:of )?'(\w+)'",
345            r#"missing (?:required )?(?:property|field)[:\s]+['"]?(\w+)['"]?"#,
346            r"reading '(\w+)'",
347            r"(\w+) is not defined",
348        ]
349        .into_iter()
350        .filter_map(|pattern| Regex::new(pattern).ok())
351        .collect()
352    });
353
354    for re in patterns {
355        if let Some(captures) = re.captures(error) {
356            let property = captures.get(1)?.as_str();
357            if ![
358                "require", "module", "exports", "window", "document", "process",
359            ]
360            .contains(&property)
361            {
362                return Some(property.to_string());
363            }
364        }
365    }
366    None
367}
368
369fn extract_unknown_dispatch(error: &str) -> Option<(String, String)> {
370    static RE: std::sync::OnceLock<Regex> = std::sync::OnceLock::new();
371    let re = RE.get_or_init(|| {
372        Regex::new(r#"(?:unknown|invalid|unrecognized|unsupported)\s+(\w+):\s*['"]?(\w+)"#)
373            .unwrap_or_else(|_| unreachable!())
374    });
375    let captures = re.captures(error)?;
376    Some((
377        captures.get(1)?.as_str().to_string(),
378        captures.get(2)?.as_str().to_string(),
379    ))
380}
381
382fn is_waf_block(status: u16, lower: &str) -> bool {
383    (status == 403 || status == 406 || status == 429)
384        && (lower.contains("access denied")
385            || lower.contains("request blocked")
386            || lower.contains("forbidden")
387            || lower.contains("cloudflare")
388            || lower.contains("akamai")
389            || lower.contains("imperva")
390            || lower.contains("bot protection")
391            || lower.contains("waf"))
392}
393
394fn fingerprint(status: u16, body: &str) -> String {
395    let compact = body
396        .split_whitespace()
397        .take(12)
398        .map(|token| token.chars().take(32).collect::<String>())
399        .collect::<Vec<_>>()
400        .join(" ");
401    format!("{status}:{}", compact.to_lowercase())
402}
403
404#[cfg(test)]
405mod tests {
406    use super::*;
407
408    #[test]
409    fn classify_response_detects_waf_block() {
410        let analysis =
411            classify_response(&HttpResponse::new(403, "Access denied by Cloudflare WAF"));
412        assert_eq!(analysis.disposition, ResponseDisposition::WafBlock);
413    }
414
415    #[test]
416    fn classify_response_detects_rate_limit() {
417        let analysis = classify_response(&HttpResponse::new(429, "Too many requests"));
418        assert_eq!(analysis.disposition, ResponseDisposition::RateLimited);
419    }
420
421    #[test]
422    fn classify_response_detects_auth_required() {
423        let analysis = classify_response(&HttpResponse::new(401, "Unauthorized"));
424        assert_eq!(analysis.disposition, ResponseDisposition::AuthRequired);
425    }
426
427    #[test]
428    fn classify_response_detects_json_parse_error() {
429        let analysis = classify_response(&HttpResponse::new(400, "Unexpected token in JSON input"));
430        assert_eq!(analysis.disposition, ResponseDisposition::JsonParseError);
431    }
432
433    #[test]
434    fn classify_response_detects_application_error() {
435        let analysis = classify_response(&HttpResponse::new(500, "SQLSTATE syntax error near foo"));
436        assert_eq!(analysis.disposition, ResponseDisposition::ApplicationError);
437    }
438
439    #[test]
440    fn classify_response_detects_false_positive() {
441        let analysis = classify_response(&HttpResponse::new(200, "all clear"));
442        assert_eq!(analysis.disposition, ResponseDisposition::FalsePositive);
443    }
444
445    #[test]
446    fn classify_response_detects_unknown_for_invalid_status() {
447        let analysis = classify_response(&HttpResponse::new(99, "whatever"));
448        assert_eq!(analysis.disposition, ResponseDisposition::Unknown);
449    }
450
451    #[test]
452    fn analyze_error_detects_sql_syntax_issue() {
453        let insight = analyze_error("SQLSTATE syntax error near 'select'").unwrap();
454        assert_eq!(insight.kind, InsightKind::SqlSyntaxError);
455    }
456
457    #[test]
458    fn analyze_error_detects_missing_property_issue() {
459        let insight = analyze_error("missing property: username").unwrap();
460        assert_eq!(
461            insight.kind,
462            InsightKind::MissingProperty("username".to_string())
463        );
464        assert_eq!(insight.suggestions[0].key, "username");
465    }
466
467    #[test]
468    fn analyze_error_detects_type_mismatch_issue() {
469        let insight = analyze_error("TypeError: age: expected number, got string").unwrap();
470        assert_eq!(
471            insight.kind,
472            InsightKind::TypeMismatch {
473                property: Some("age".to_string()),
474                expected: "number".to_string(),
475            }
476        );
477    }
478
479    #[test]
480    fn analyze_error_detects_unknown_dispatch_issue() {
481        let insight = analyze_error("unsupported action: deleteAll").unwrap();
482        assert_eq!(
483            insight.kind,
484            InsightKind::UnknownDispatch {
485                property: "action".to_string(),
486                tried: "deleteall".to_string(),
487            }
488        );
489    }
490
491    #[test]
492    fn analyze_error_detects_auth_issue() {
493        let insight = analyze_error("permission denied").unwrap();
494        assert_eq!(insight.kind, InsightKind::AuthRequired);
495    }
496
497    #[test]
498    fn analyze_error_detects_parse_issue() {
499        let insight = analyze_error("invalid json payload").unwrap();
500        assert_eq!(insight.kind, InsightKind::JsonParseError);
501    }
502
503    #[test]
504    fn analyze_error_detects_rate_limit_issue() {
505        let insight = analyze_error("rate limit exceeded").unwrap();
506        assert_eq!(insight.kind, InsightKind::RateLimited);
507    }
508
509    #[test]
510    fn analyze_error_detects_misc_url_issue() {
511        let insight = analyze_error("invalid URL passed to fetch").unwrap();
512        assert_eq!(insight.kind, InsightKind::Other);
513        assert_eq!(insight.suggestions[0].key, "url");
514    }
515
516    #[test]
517    fn extract_missing_property_ignores_common_globals() {
518        assert_eq!(extract_missing_property("window is not defined"), None);
519    }
520
521    #[test]
522    fn extract_unknown_dispatch_parses_property_and_value() {
523        assert_eq!(
524            extract_unknown_dispatch("invalid action: login"),
525            Some(("action".to_string(), "login".to_string()))
526        );
527    }
528
529    #[test]
530    fn is_waf_block_requires_blocking_signals() {
531        assert!(is_waf_block(403, "akamai request blocked"));
532        assert!(!is_waf_block(403, "generic authorization failure"));
533    }
534
535    #[test]
536    fn fingerprint_normalizes_whitespace_and_case() {
537        assert_eq!(fingerprint(500, "Error   HERE"), "500:error here");
538    }
539}