1pub fn split_tool_name(tool_name: &str) -> (String, String) {
13 match tool_name.split_once(crate::core::manifest::TOOL_SEP) {
14 Some((p, op)) if !p.is_empty() && !op.is_empty() => (p.to_string(), op.to_string()),
15 Some((p, _)) if !p.is_empty() => (p.to_string(), "unknown".to_string()),
18 _ => (tool_name.to_string(), "unknown".to_string()),
19 }
20}
21
22pub fn scrub_and_truncate(s: &str, max_len: usize) -> String {
26 let scrubbed = scrub(s);
27 if scrubbed.chars().count() <= max_len {
28 scrubbed
29 } else {
30 let mut out: String = scrubbed.chars().take(max_len.saturating_sub(1)).collect();
31 out.push('…');
32 out
33 }
34}
35
36fn scrub(s: &str) -> String {
37 let bytes = s.as_bytes();
38 let mut out = String::with_capacity(s.len());
39 let mut i = 0;
40 while i < bytes.len() {
41 if let Some(end) = match_uuid(bytes, i) {
47 out.push_str("***");
48 i = end;
49 } else if let Some(end) = match_email(bytes, i) {
50 out.push_str("***");
51 i = end;
52 } else if let Some(end) = match_ipv4(bytes, i) {
53 out.push_str("***");
54 i = end;
55 } else if let Some(end) = match_long_hex(bytes, i) {
56 out.push_str("***");
57 i = end;
58 } else {
59 let ch_len = utf8_char_len(bytes[i]);
62 let end = (i + ch_len).min(bytes.len());
63 out.push_str(std::str::from_utf8(&bytes[i..end]).unwrap_or(""));
66 i = end;
67 }
68 }
69 out
70}
71
72fn utf8_char_len(lead: u8) -> usize {
76 match lead {
77 0..=0x7F => 1,
78 0xC0..=0xDF => 2,
79 0xE0..=0xEF => 3,
80 0xF0..=0xFF => 4,
81 _ => 1, }
83}
84
85fn is_hex(b: u8) -> bool {
86 b.is_ascii_hexdigit()
87}
88
89fn match_uuid(b: &[u8], start: usize) -> Option<usize> {
91 let spans = [8usize, 4, 4, 4, 12];
92 let mut i = start;
93 for (idx, span) in spans.iter().enumerate() {
94 if i + span > b.len() {
95 return None;
96 }
97 for k in 0..*span {
98 if !is_hex(b[i + k]) {
99 return None;
100 }
101 }
102 i += span;
103 if idx < spans.len() - 1 {
104 if i >= b.len() || b[i] != b'-' {
105 return None;
106 }
107 i += 1;
108 }
109 }
110 Some(i)
111}
112
113fn match_long_hex(b: &[u8], start: usize) -> Option<usize> {
117 if start > 0 && is_hex(b[start - 1]) {
119 return None;
120 }
121 let mut i = start;
122 let mut has_digit = false;
123 let mut has_alpha = false;
124 while i < b.len() && is_hex(b[i]) {
125 if b[i].is_ascii_digit() {
126 has_digit = true;
127 } else {
128 has_alpha = true;
129 }
130 i += 1;
131 }
132 if i - start >= 24 && has_digit && has_alpha {
133 Some(i)
134 } else {
135 None
136 }
137}
138
139fn match_email(b: &[u8], start: usize) -> Option<usize> {
140 let mut i = start;
141 let local_start = i;
142 while i < b.len() && is_email_local(b[i]) {
143 i += 1;
144 }
145 if i == local_start || i >= b.len() || b[i] != b'@' {
146 return None;
147 }
148 i += 1; let domain_start = i;
150 while i < b.len() && is_email_domain(b[i]) {
151 i += 1;
152 }
153 if i == domain_start {
154 return None;
155 }
156 if !b[domain_start..i].contains(&b'.') {
158 return None;
159 }
160 Some(i)
161}
162
163fn is_email_local(b: u8) -> bool {
164 b.is_ascii_alphanumeric() || matches!(b, b'.' | b'_' | b'-' | b'+')
165}
166
167fn is_email_domain(b: u8) -> bool {
168 b.is_ascii_alphanumeric() || matches!(b, b'.' | b'-')
169}
170
171fn match_ipv4(b: &[u8], start: usize) -> Option<usize> {
172 if start > 0 && (b[start - 1].is_ascii_digit() || b[start - 1] == b'.') {
175 return None;
176 }
177 let mut i = start;
178 for octet in 0..4 {
179 let octet_start = i;
180 while i < b.len() && b[i].is_ascii_digit() {
181 i += 1;
182 if i - octet_start > 3 {
183 return None;
184 }
185 }
186 if i == octet_start {
187 return None;
188 }
189 let octet_str = std::str::from_utf8(&b[octet_start..i]).unwrap_or("");
191 let octet_val: u16 = octet_str.parse().unwrap_or(u16::MAX);
192 if octet_val > 255 {
193 return None;
194 }
195 if octet < 3 {
196 if i >= b.len() || b[i] != b'.' {
197 return None;
198 }
199 i += 1;
200 }
201 }
202 if i < b.len() && (b[i].is_ascii_digit() || b[i] == b'.') {
204 return None;
205 }
206 Some(i)
207}
208
209pub fn parse_upstream_error(body: &str) -> (Option<String>, Option<String>) {
217 let trimmed = body.trim_start();
221 if !trimmed.starts_with('{') && !trimmed.starts_with('[') {
222 return (None, None);
223 }
224 let v: serde_json::Value = match serde_json::from_str(body) {
225 Ok(v) => v,
226 Err(_) => return (None, None),
227 };
228
229 let (error_type, error_message) = match v {
230 serde_json::Value::Object(ref map) => {
231 let err_field = map.get("error");
232 let error_type = err_field
233 .and_then(|e| e.get("type"))
234 .and_then(|t| t.as_str())
235 .map(str::to_string)
236 .or_else(|| map.get("type").and_then(|t| t.as_str()).map(str::to_string))
237 .or_else(|| {
238 map.get("error_type")
239 .and_then(|t| t.as_str())
240 .map(str::to_string)
241 });
242
243 let error_message = err_field
244 .and_then(|e| e.get("message"))
245 .and_then(|m| m.as_str())
246 .map(str::to_string)
247 .or_else(|| {
248 err_field.and_then(|e| e.as_str()).map(str::to_string)
250 })
251 .or_else(|| {
252 map.get("message")
253 .and_then(|m| m.as_str())
254 .map(str::to_string)
255 });
256
257 (error_type, error_message)
258 }
259 _ => (None, None),
260 };
261
262 (error_type, error_message)
263}
264
265pub fn is_no_records_body(error_type: Option<&str>, error_message: Option<&str>) -> bool {
274 let msg = match error_message {
275 Some(m) => m.trim(),
276 None => return false,
277 };
278 let lower = msg.to_ascii_lowercase();
279 let lower = lower.trim_start_matches("no ");
280 let keywords = [
281 "records were found",
282 "companies were found",
283 "persons were found",
284 "results were found",
285 "matches were found",
286 "records found",
287 "companies found",
288 "persons found",
289 "results found",
290 "matches found",
291 ];
292 let message_matches = keywords.iter().any(|k| lower.starts_with(k));
293 if message_matches {
294 return true;
295 }
296 if matches!(error_type, Some("not_found")) {
303 return lower == "not found" || lower.is_empty();
304 }
305 false
306}
307
308pub fn report_upstream_error(
318 provider: &str,
319 operation_id: &str,
320 upstream_status: u16,
321 proxy_status: u16,
322 error_type: Option<&str>,
323 error_message: Option<&str>,
324) {
325 let msg_short = error_message
326 .map(|m| scrub_and_truncate(m, 140))
327 .unwrap_or_default();
328
329 with_upstream_scope(
335 provider,
336 operation_id,
337 upstream_status,
338 proxy_status,
339 error_type,
340 &msg_short,
341 || match upstream_status {
342 402 | 403 | 422 => {
343 tracing::warn!(
344 provider,
345 operation_id,
346 upstream_status,
347 proxy_status,
348 error_type = error_type.unwrap_or(""),
349 msg = %msg_short,
350 "upstream client error"
351 );
352 #[cfg(feature = "sentry")]
356 sentry::capture_message(
357 &format!("upstream client error ({upstream_status}) {provider}:{operation_id}"),
358 sentry::Level::Warning,
359 );
360 }
361 _ => tracing::error!(
362 provider,
363 operation_id,
364 upstream_status,
365 proxy_status,
366 error_type = error_type.unwrap_or(""),
367 msg = %msg_short,
368 "upstream server error"
369 ),
370 },
371 );
372}
373
374#[cfg(feature = "sentry")]
375fn with_upstream_scope<F: FnOnce()>(
376 provider: &str,
377 operation_id: &str,
378 upstream_status: u16,
379 proxy_status: u16,
380 error_type: Option<&str>,
381 msg_short: &str,
382 body: F,
383) {
384 let upstream_s = upstream_status.to_string();
385 let proxy_s = proxy_status.to_string();
386 sentry::with_scope(
387 |scope| {
388 scope.set_tag("provider", provider);
389 scope.set_tag("operation_id", operation_id);
390 scope.set_tag("upstream_status", &upstream_s);
391 scope.set_tag("proxy_status", &proxy_s);
392 if let Some(t) = error_type {
393 scope.set_tag("upstream_error_type", t);
394 }
395 if !msg_short.is_empty() {
396 scope.set_extra(
397 "upstream_error_message",
398 serde_json::Value::String(msg_short.to_string()),
399 );
400 }
401 scope.set_fingerprint(Some(
402 [
403 "ati.proxy.upstream_error",
404 provider,
405 operation_id,
406 &upstream_s,
407 ]
408 .as_slice(),
409 ));
410 },
411 body,
412 );
413}
414
415#[cfg(not(feature = "sentry"))]
416fn with_upstream_scope<F: FnOnce()>(
417 _provider: &str,
418 _operation_id: &str,
419 _upstream_status: u16,
420 _proxy_status: u16,
421 _error_type: Option<&str>,
422 _msg_short: &str,
423 body: F,
424) {
425 body();
426}
427
428#[cfg(test)]
429mod tests {
430 use super::*;
431
432 #[test]
433 fn split_tool_name_ok() {
434 assert_eq!(
435 split_tool_name("finnhub:price_target"),
436 ("finnhub".into(), "price_target".into())
437 );
438 }
439
440 #[test]
441 fn split_tool_name_missing_op() {
442 assert_eq!(
443 split_tool_name("bare_tool"),
444 ("bare_tool".into(), "unknown".into())
445 );
446 }
447
448 #[test]
449 fn split_tool_name_empty_op() {
450 assert_eq!(
451 split_tool_name("provider:"),
452 ("provider".into(), "unknown".into())
453 );
454 }
455
456 #[test]
457 fn parse_nested_pdl_body() {
458 let body = r#"{"status":404,"error":{"type":"not_found","message":"No records were found matching your request"}}"#;
459 let (t, m) = parse_upstream_error(body);
460 assert_eq!(t.as_deref(), Some("not_found"));
461 assert_eq!(
462 m.as_deref(),
463 Some("No records were found matching your request")
464 );
465 }
466
467 #[test]
468 fn parse_flat_xai_style_body() {
469 let body = r#"{"error":"Insufficient credits","message":"Your current balance is $0.01"}"#;
470 let (t, m) = parse_upstream_error(body);
471 assert!(t.is_none());
472 assert_eq!(m.as_deref(), Some("Insufficient credits"));
473 }
474
475 #[test]
476 fn parse_non_json_body() {
477 let (t, m) = parse_upstream_error("not json at all");
478 assert!(t.is_none());
479 assert!(m.is_none());
480 }
481
482 #[test]
483 fn no_records_type_alone_does_not_match() {
484 assert!(!is_no_records_body(Some("not_found"), None));
488 assert!(!is_no_records_body(
489 Some("not_found"),
490 Some("User account 42 was deleted")
491 ));
492 }
493
494 #[test]
495 fn no_records_type_with_generic_not_found_message_matches() {
496 assert!(is_no_records_body(Some("not_found"), Some("not found")));
498 assert!(is_no_records_body(Some("not_found"), Some("")));
499 }
500
501 #[test]
502 fn no_records_message_matches() {
503 assert!(is_no_records_body(
504 None,
505 Some("No records were found matching your request")
506 ));
507 assert!(is_no_records_body(
508 None,
509 Some("No companies were found matching your request")
510 ));
511 assert!(is_no_records_body(None, Some("no results found")));
512 }
513
514 #[test]
515 fn no_records_rejects_real_errors() {
516 assert!(!is_no_records_body(Some("invalid_request"), None));
517 assert!(!is_no_records_body(None, Some("Insufficient credits")));
518 assert!(!is_no_records_body(None, Some("Forbidden")));
519 assert!(!is_no_records_body(None, None));
520 }
521
522 #[test]
523 fn scrub_uuid() {
524 let s = "request id 550e8400-e29b-41d4-a716-446655440000 failed";
525 assert_eq!(scrub(s), "request id *** failed");
526 }
527
528 #[test]
529 fn scrub_email() {
530 assert_eq!(scrub("contact miguel@parcha.ai now"), "contact *** now");
531 }
532
533 #[test]
534 fn scrub_ipv4() {
535 assert_eq!(scrub("from 192.168.1.1 blocked"), "from *** blocked");
536 }
537
538 #[test]
539 fn scrub_ipv4_rejects_version_strings() {
540 assert_eq!(
543 scrub("library 1.2.3.4.5 raised an error"),
544 "library 1.2.3.4.5 raised an error"
545 );
546 assert_eq!(scrub("version 10.11.12.13.0"), "version 10.11.12.13.0");
547 }
548
549 #[test]
550 fn scrub_ipv4_rejects_out_of_range_octets() {
551 assert_eq!(
553 scrub("bogus 999.999.999.999 ip"),
554 "bogus 999.999.999.999 ip"
555 );
556 }
557
558 #[test]
559 fn scrub_long_hex_token() {
560 let tok = "abcdef0123456789abcdef0123456789abcdef01";
562 assert_eq!(scrub(&format!("token {tok} bad")), "token *** bad");
563 }
564
565 #[test]
566 fn scrub_preserves_short_hex() {
567 assert_eq!(scrub("hex abc123 fine"), "hex abc123 fine");
569 }
570
571 #[test]
572 fn scrub_preserves_multibyte_utf8() {
573 assert_eq!(scrub("café résumé 日本語"), "café résumé 日本語");
575 }
576
577 #[test]
578 fn scrub_mixed_utf8_and_secrets() {
579 let input = "café contact miguel@parcha.ai résumé";
580 assert_eq!(scrub(input), "café contact *** résumé");
581 }
582
583 #[test]
584 fn parse_non_json_html_body_early_outs() {
585 let (t, m) = parse_upstream_error("<html><body>502 Bad Gateway</body></html>");
588 assert!(t.is_none());
589 assert!(m.is_none());
590 }
591
592 #[test]
593 fn parse_empty_body_returns_none() {
594 let (t, m) = parse_upstream_error("");
595 assert!(t.is_none());
596 assert!(m.is_none());
597 }
598
599 #[test]
600 fn truncate_long_message() {
601 let s = "a".repeat(500);
602 let out = scrub_and_truncate(&s, 20);
603 assert_eq!(out.chars().count(), 20);
604 assert!(out.ends_with('…'));
605 }
606
607 #[test]
608 fn truncate_short_message_untouched() {
609 assert_eq!(scrub_and_truncate("short", 100), "short");
610 }
611}