1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
//! Glob Pattern Matching Tool
//!
//! Find files matching glob patterns.
use super::error::{Result, ToolError};
use super::r#trait::{Tool, ToolCapability, ToolExecutionContext, ToolResult};
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::path::PathBuf;
/// Glob pattern matching tool
pub struct GlobTool;
#[derive(Debug, Deserialize, Serialize)]
struct GlobInput {
/// Glob pattern to match
pattern: String,
/// Base directory for search (defaults to working directory)
#[serde(default)]
base_dir: Option<String>,
/// Maximum number of results to return
#[serde(default)]
limit: Option<usize>,
/// Include hidden files
#[serde(default)]
include_hidden: bool,
}
#[async_trait]
impl Tool for GlobTool {
fn name(&self) -> &str {
"glob"
}
fn description(&self) -> &str {
"Find files matching a glob pattern. Supports wildcards: * (any chars), ** (recursive directories), ? (single char), [abc] (char class)."
}
fn input_schema(&self) -> Value {
serde_json::json!({
"type": "object",
"properties": {
"pattern": {
"type": "string",
"description": "Glob pattern (e.g., '**/*.rs', 'src/**/*.test.js', '*.{md,txt}')"
},
"base_dir": {
"type": "string",
"description": "Base directory for search (defaults to working directory)"
},
"limit": {
"type": "integer",
"description": "Maximum number of results to return",
"minimum": 1
},
"include_hidden": {
"type": "boolean",
"description": "Include hidden files (starting with .)",
"default": false
}
},
"required": ["pattern"]
})
}
fn capabilities(&self) -> Vec<ToolCapability> {
vec![ToolCapability::ReadFiles]
}
fn requires_approval(&self) -> bool {
false // Pattern matching is safe
}
fn validate_input(&self, input: &Value) -> Result<()> {
let input: GlobInput = serde_json::from_value(input.clone())
.map_err(|e| ToolError::InvalidInput(format!("Invalid input: {}", e)))?;
if input.pattern.trim().is_empty() {
return Err(ToolError::InvalidInput(
"Pattern cannot be empty".to_string(),
));
}
Ok(())
}
async fn execute(&self, input: Value, context: &ToolExecutionContext) -> Result<ToolResult> {
let input: GlobInput = serde_json::from_value(input)?;
// Resolve base directory (tilde expansion + absolute/relative resolution).
let base_dir = if let Some(ref dir) = input.base_dir {
super::error::resolve_tool_path(dir, &context.working_dir())
} else {
context.working_dir()
};
if !base_dir.exists() {
return Ok(ToolResult::error(format!(
"Base directory does not exist: {}",
base_dir.display()
)));
}
// Compile the pattern once and match it against each walked path,
// rather than letting the `glob` crate drive the filesystem walk. The
// crate's walk follows symlinks (a loop hangs forever), has no time or
// entry bound, and runs synchronously on the async executor — a `**`
// from a large home dir once hung for 16+ minutes in production.
let pattern = glob::Pattern::new(&input.pattern)
.map_err(|e| ToolError::InvalidInput(format!("Invalid glob pattern: {}", e)))?;
let include_hidden = input.include_hidden;
let limit = input.limit;
let base = base_dir.clone();
// Hard bounds so a pathological tree can't hang the agent.
const MAX_ENTRIES: usize = 500_000;
const WALK_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30);
// The walk is blocking — run it off the async executor.
let walk = tokio::task::spawn_blocking(move || {
use ignore::WalkBuilder;
let mut builder = WalkBuilder::new(&base);
builder
// NEVER follow symlinks — this is the loop that hung for minutes.
.follow_links(false)
// Descend into dotdirs (the target may live under ~/.opencrabs),
// but disable all gitignore handling so a `.gitignore` (e.g. the
// `*` one in ~/.opencrabs) doesn't hide the very files we seek.
.hidden(false)
.git_ignore(false)
.git_global(false)
.git_exclude(false)
.ignore(false)
.parents(false)
// Prune notorious heavyweight dirs that blow up walk time and are
// never what a file-find wants.
.filter_entry(|e| {
!matches!(
e.file_name().to_str(),
Some("node_modules" | ".git" | "target" | ".cache")
)
});
let mut matches: Vec<PathBuf> = Vec::new();
let mut scanned = 0usize;
let mut truncated = false;
for dent in builder.build() {
scanned += 1;
if scanned > MAX_ENTRIES {
truncated = true;
break;
}
let dent = match dent {
Ok(d) => d,
Err(e) => {
tracing::warn!("glob: error reading entry: {}", e);
continue;
}
};
let path = dent.path();
let rel = path.strip_prefix(&base).unwrap_or(path);
if !pattern.matches_path(rel) {
continue;
}
// Preserve the original "skip dot-FILES unless include_hidden"
// behaviour (filters the final component only, not parent dirs).
if !include_hidden
&& path
.file_name()
.and_then(|n| n.to_str())
.map(|s| s.starts_with('.'))
.unwrap_or(false)
{
continue;
}
matches.push(path.to_path_buf());
if let Some(limit) = limit
&& matches.len() >= limit
{
break;
}
}
(matches, truncated)
});
let (mut matches, truncated) = match tokio::time::timeout(WALK_TIMEOUT, walk).await {
Ok(Ok(result)) => result,
Ok(Err(e)) => {
return Ok(ToolResult::error(format!("glob walk failed: {e}")));
}
Err(_) => {
return Ok(ToolResult::error(format!(
"glob timed out after {}s scanning '{}'. The tree is too large — \
narrow the pattern or pass a more specific `base_dir`.",
WALK_TIMEOUT.as_secs(),
base_dir.display()
)));
}
};
if matches.is_empty() {
return Ok(ToolResult::success(format!(
"No files found matching pattern: {}",
input.pattern
)));
}
// Sort matches for consistent output
matches.sort();
// Format output
let mut output = format!(
"Found {} files matching '{}':\n\n",
matches.len(),
input.pattern
);
for path in &matches {
// Make path relative to base_dir for cleaner output
let display_path = path
.strip_prefix(&base_dir)
.unwrap_or(path)
.display()
.to_string();
output.push_str(&format!(" {}\n", display_path));
}
if let Some(limit) = input.limit
&& matches.len() >= limit
{
output.push_str(&format!("\n(Limited to {} results)", limit));
}
if truncated {
output.push_str(&format!(
"\n(Stopped after scanning {MAX_ENTRIES} entries — tree too large; \
narrow the pattern or pass a more specific base_dir)"
));
}
Ok(ToolResult::success(output))
}
}