1use devboy_core::FollowUpLink;
23use serde_json::{Map, Value};
24
25pub const MAX_PROJECTIONS_PER_LINK: usize = 3;
30
31pub fn extract_args(prev_tool: &str, prev_result: &Value, link: &FollowUpLink) -> Vec<Value> {
45 if let Some(args) = builtin_extract(prev_tool, prev_result, link) {
46 return args;
47 }
48 generic_extract(prev_result, link)
49}
50
51fn builtin_extract(
56 prev_tool: &str,
57 prev_result: &Value,
58 link: &FollowUpLink,
59) -> Option<Vec<Value>> {
60 match (prev_tool, link.tool.as_str()) {
61 ("Glob", "Read") | ("Glob", "Grep") => Some(extract_glob_paths(
62 prev_result,
63 link.projection_arg.as_deref().unwrap_or("file_path"),
64 )),
65 ("Grep", "Read") | ("Grep", "Edit") => Some(extract_grep_paths(
66 prev_result,
67 link.projection_arg.as_deref().unwrap_or("file_path"),
68 )),
69 ("WebSearch", "WebFetch") => Some(extract_websearch_urls(
70 prev_result,
71 link.projection_arg.as_deref().unwrap_or("url"),
72 )),
73 _ => None,
74 }
75}
76
77fn extract_glob_paths(prev_result: &Value, arg_name: &str) -> Vec<Value> {
82 let paths = if let Some(arr) = prev_result.as_array() {
83 arr.iter()
84 .filter_map(|v| {
85 v.as_str()
86 .map(String::from)
87 .or_else(|| string_field(v, "path"))
88 .or_else(|| string_field(v, "match_path"))
89 })
90 .collect::<Vec<_>>()
91 } else if let Some(s) = prev_result.as_str() {
92 s.lines()
93 .map(str::trim)
94 .filter(|l| !l.is_empty())
95 .map(String::from)
96 .collect()
97 } else {
98 Vec::new()
99 };
100
101 paths
102 .into_iter()
103 .take(MAX_PROJECTIONS_PER_LINK)
104 .map(|p| single_arg(arg_name, Value::String(p)))
105 .collect()
106}
107
108fn extract_grep_paths(prev_result: &Value, arg_name: &str) -> Vec<Value> {
113 let body = match prev_result {
114 Value::String(s) => s.clone(),
115 Value::Array(arr) => {
118 let mut seen: Vec<String> = Vec::new();
119 for v in arr {
120 if let Some(p) = string_field(v, "path").or_else(|| string_field(v, "file"))
121 && !seen.contains(&p)
122 {
123 seen.push(p);
124 }
125 if seen.len() >= MAX_PROJECTIONS_PER_LINK {
126 break;
127 }
128 }
129 return seen
130 .into_iter()
131 .map(|p| single_arg(arg_name, Value::String(p)))
132 .collect();
133 }
134 _ => return Vec::new(),
135 };
136
137 let mut seen: Vec<String> = Vec::new();
138 for line in body.lines() {
139 let trimmed = line.trim();
140 if trimmed.is_empty() {
141 continue;
142 }
143 let path = trimmed.split(':').next().unwrap_or("").trim().to_string();
146 if path.is_empty() || seen.contains(&path) {
147 continue;
148 }
149 seen.push(path);
150 if seen.len() >= MAX_PROJECTIONS_PER_LINK {
151 break;
152 }
153 }
154 seen.into_iter()
155 .map(|p| single_arg(arg_name, Value::String(p)))
156 .collect()
157}
158
159fn extract_websearch_urls(prev_result: &Value, arg_name: &str) -> Vec<Value> {
164 let arr = prev_result
165 .get("results")
166 .and_then(Value::as_array)
167 .or_else(|| prev_result.as_array());
168 let Some(arr) = arr else {
169 return Vec::new();
170 };
171 arr.iter()
172 .filter_map(|v| string_field(v, "url"))
173 .take(1)
174 .map(|u| single_arg(arg_name, Value::String(u)))
175 .collect()
176}
177
178fn generic_extract(prev_result: &Value, link: &FollowUpLink) -> Vec<Value> {
183 let Some(field) = link.projection.as_deref() else {
184 return Vec::new();
185 };
186 let Some(arg_name) = link.projection_arg.as_deref() else {
187 return Vec::new();
188 };
189
190 let mut out: Vec<Value> = Vec::new();
191 walk(prev_result, field, &mut |v| {
192 out.push(single_arg(arg_name, v.clone()));
193 out.len() < MAX_PROJECTIONS_PER_LINK
194 });
195 out
196}
197
198fn walk(v: &Value, field: &str, visit: &mut impl FnMut(&Value) -> bool) -> bool {
202 match v {
203 Value::Object(map) => {
204 for (k, val) in map {
205 if k == field {
206 let cont = visit(val);
207 if !cont {
208 return false;
209 }
210 }
211 if !walk(val, field, visit) {
212 return false;
213 }
214 }
215 true
216 }
217 Value::Array(arr) => {
218 for item in arr {
219 if !walk(item, field, visit) {
220 return false;
221 }
222 }
223 true
224 }
225 _ => true,
226 }
227}
228
229fn string_field(v: &Value, name: &str) -> Option<String> {
230 v.get(name).and_then(Value::as_str).map(String::from)
231}
232
233pub fn extract_host(url: &str) -> Option<String> {
249 let after_scheme = url.split_once("://").map(|(_, rest)| rest)?;
250 let authority = after_scheme.split(['/', '?', '#']).next()?;
252 if authority.is_empty() {
253 return None;
254 }
255 let host_with_port = match authority.rsplit_once('@') {
257 Some((_, rest)) => rest,
258 None => authority,
259 };
260 let host = if let Some(stripped) = host_with_port.strip_prefix('[') {
262 let close = stripped.find(']')?;
263 let inside = &stripped[..close];
264 format!("[{inside}]")
267 } else {
268 host_with_port
270 .rsplit_once(':')
271 .map(|(h, _)| h)
272 .unwrap_or(host_with_port)
273 .to_string()
274 };
275 if host.is_empty() {
276 return None;
277 }
278 Some(host.to_ascii_lowercase())
279}
280
281fn single_arg(name: &str, value: Value) -> Value {
282 let mut m = Map::new();
283 m.insert(name.to_string(), value);
284 Value::Object(m)
285}
286
287#[cfg(test)]
288mod tests {
289 use super::*;
290 use serde_json::json;
291
292 fn link(tool: &str, projection: &str, arg: &str) -> FollowUpLink {
293 FollowUpLink {
294 tool: tool.into(),
295 probability: 1.0,
296 projection: Some(projection.into()),
297 projection_arg: Some(arg.into()),
298 }
299 }
300
301 #[test]
302 fn glob_to_read_extracts_paths_from_array_of_strings() {
303 let result = json!(["src/main.rs", "src/lib.rs", "src/api.rs", "src/db.rs"]);
304 let l = link("Read", "match_path", "file_path");
305 let args = extract_args("Glob", &result, &l);
306 assert_eq!(args.len(), 3);
308 assert_eq!(args[0]["file_path"], "src/main.rs");
309 assert_eq!(args[1]["file_path"], "src/lib.rs");
310 assert_eq!(args[2]["file_path"], "src/api.rs");
311 }
312
313 #[test]
314 fn glob_to_read_extracts_paths_from_array_of_objects() {
315 let result = json!([
316 {"path": "a.rs", "size": 100},
317 {"path": "b.rs", "size": 200},
318 ]);
319 let l = link("Read", "path", "file_path");
320 let args = extract_args("Glob", &result, &l);
321 assert_eq!(args.len(), 2);
322 assert_eq!(args[0]["file_path"], "a.rs");
323 assert_eq!(args[1]["file_path"], "b.rs");
324 }
325
326 #[test]
327 fn glob_to_read_extracts_paths_from_text_body() {
328 let result = Value::String("src/main.rs\n\nsrc/lib.rs\n src/api.rs \n".into());
329 let l = link("Read", "match_path", "file_path");
330 let args = extract_args("Glob", &result, &l);
331 assert_eq!(args.len(), 3);
332 assert_eq!(args[0]["file_path"], "src/main.rs");
333 assert_eq!(args[1]["file_path"], "src/lib.rs");
334 assert_eq!(args[2]["file_path"], "src/api.rs");
335 }
336
337 #[test]
338 fn grep_to_read_dedups_by_path() {
339 let result = Value::String(
342 "src/main.rs:10:fn foo() {}\n\
343 src/main.rs:42:fn bar() {}\n\
344 src/lib.rs:5:fn baz() {}\n\
345 src/db.rs:1:use std;\n"
346 .into(),
347 );
348 let l = link("Read", "path", "file_path");
349 let args = extract_args("Grep", &result, &l);
350 assert_eq!(args.len(), 3);
351 let paths: Vec<&str> = args
352 .iter()
353 .map(|a| a["file_path"].as_str().unwrap())
354 .collect();
355 assert_eq!(paths, vec!["src/main.rs", "src/lib.rs", "src/db.rs"]);
356 }
357
358 #[test]
359 fn grep_to_read_handles_array_of_objects() {
360 let result = json!([
361 {"path": "a.rs", "line": 1},
362 {"path": "a.rs", "line": 2},
363 {"path": "b.rs", "line": 1},
364 ]);
365 let l = link("Read", "path", "file_path");
366 let args = extract_args("Grep", &result, &l);
367 assert_eq!(args.len(), 2);
368 assert_eq!(args[0]["file_path"], "a.rs");
369 assert_eq!(args[1]["file_path"], "b.rs");
370 }
371
372 #[test]
373 fn websearch_to_webfetch_takes_top_url_only() {
374 let result = json!({
375 "results": [
376 {"title": "First", "url": "https://example.com/a", "snippet": "…"},
377 {"title": "Second", "url": "https://example.com/b", "snippet": "…"},
378 ]
379 });
380 let l = link("WebFetch", "url", "url");
381 let args = extract_args("WebSearch", &result, &l);
382 assert_eq!(args.len(), 1);
384 assert_eq!(args[0]["url"], "https://example.com/a");
385 }
386
387 #[test]
388 fn generic_fallback_walks_nested_objects() {
389 let result = json!({
390 "outer": {
391 "inner": [
392 {"id": 1, "deep": {"target_field": "value-1"}},
393 {"id": 2, "deep": {"target_field": "value-2"}},
394 ]
395 }
396 });
397 let l = FollowUpLink {
398 tool: "custom_get".into(),
399 probability: 1.0,
400 projection: Some("target_field".into()),
401 projection_arg: Some("identifier".into()),
402 };
403 let args = extract_args("custom_list", &result, &l);
404 assert_eq!(args.len(), 2);
405 assert_eq!(args[0]["identifier"], "value-1");
406 assert_eq!(args[1]["identifier"], "value-2");
407 }
408
409 #[test]
410 fn generic_fallback_returns_empty_when_projection_missing() {
411 let result = json!({"x": 1});
412 let l = FollowUpLink {
413 tool: "next".into(),
414 probability: 1.0,
415 ..FollowUpLink::default()
417 };
418 let args = extract_args("prev", &result, &l);
419 assert!(args.is_empty());
420 }
421
422 #[test]
425 fn extract_host_strips_scheme_and_path() {
426 assert_eq!(
427 extract_host("https://api.github.com/repos/x/y"),
428 Some("api.github.com".into())
429 );
430 assert_eq!(
431 extract_host("https://gitlab.example.com/project/-/issues"),
432 Some("gitlab.example.com".into())
433 );
434 }
435
436 #[test]
437 fn extract_host_lowercases_and_drops_port() {
438 assert_eq!(
439 extract_host("http://Example.COM:8080/foo"),
440 Some("example.com".into())
441 );
442 assert_eq!(
443 extract_host("https://API.OPENAI.COM"),
444 Some("api.openai.com".into())
445 );
446 }
447
448 #[test]
449 fn extract_host_handles_userinfo() {
450 assert_eq!(
451 extract_host("https://user:pass@host.example.org/x"),
452 Some("host.example.org".into())
453 );
454 assert_eq!(
455 extract_host("ftp://anonymous@ftp.example.org"),
456 Some("ftp.example.org".into())
457 );
458 }
459
460 #[test]
461 fn extract_host_keeps_ipv6_brackets() {
462 assert_eq!(extract_host("https://[::1]:80/p"), Some("[::1]".into()));
463 assert_eq!(
464 extract_host("http://[2001:db8::1]/foo"),
465 Some("[2001:db8::1]".into())
466 );
467 }
468
469 #[test]
470 fn extract_host_returns_none_for_non_urls() {
471 assert!(extract_host("/local/path").is_none());
472 assert!(extract_host("just-a-string").is_none());
473 assert!(extract_host("").is_none());
474 assert!(extract_host("https://").is_none());
475 }
476
477 #[test]
478 fn extract_host_strips_query_and_fragment() {
479 assert_eq!(
480 extract_host("https://example.com/foo?bar=1&baz=2"),
481 Some("example.com".into())
482 );
483 assert_eq!(
484 extract_host("https://example.com#anchor"),
485 Some("example.com".into())
486 );
487 }
488
489 #[test]
490 fn unknown_chain_falls_through_to_generic() {
491 let result = json!({"items": [{"key": "k1"}, {"key": "k2"}]});
492 let l = FollowUpLink {
493 tool: "consume".into(),
494 probability: 1.0,
495 projection: Some("key".into()),
496 projection_arg: Some("name".into()),
497 };
498 let args = extract_args("produce", &result, &l);
499 assert_eq!(args.len(), 2);
500 assert_eq!(args[0]["name"], "k1");
501 assert_eq!(args[1]["name"], "k2");
502 }
503}