double_o/pattern/toml.rs
1use regex::Regex;
2use regex::RegexBuilder;
3use serde::Deserialize;
4use std::path::Path;
5
6use super::{FailurePattern, FailureStrategy, Pattern, SuccessPattern, SuccessStrategy};
7use crate::error::Error;
8
9// ---------------------------------------------------------------------------
10// Regex validation limits
11// ---------------------------------------------------------------------------
12
13/// Maximum allowed length for user-provided regex patterns.
14///
15/// This limit prevents overly complex regex patterns that could cause
16/// performance issues or unexpected ReDOS attacks.
17const MAX_REGEX_LENGTH: usize = 500;
18
19/// Size limit for regex compilation (in bytes).
20///
21/// Prevents pathological regex patterns from consuming excessive memory.
22/// Set to 100 KB - ample for all reasonable patterns while still limiting ReDOS risk.
23const REGEX_SIZE_LIMIT: usize = 100 * 1024; // 100 KB
24
25/// Validate and compile a user-provided regex string with safety limits.
26///
27/// This function checks that the regex string is not overly long and compiles
28/// it with a reasonable size limit to prevent resource exhaustion issues.
29///
30/// # Arguments
31///
32/// * `pattern` - The regex pattern string to compile
33///
34/// # Errors
35///
36/// Returns `Error::Pattern` if the regex is too long or fails to compile.
37fn validate_and_compile_regex(pattern: &str) -> Result<Regex, Error> {
38 if pattern.len() > MAX_REGEX_LENGTH {
39 return Err(Error::Pattern(format!(
40 "regex too long ({} > {} chars)",
41 pattern.len(),
42 MAX_REGEX_LENGTH
43 )));
44 }
45
46 RegexBuilder::new(pattern)
47 .size_limit(REGEX_SIZE_LIMIT)
48 .build()
49 .map_err(|e| Error::Pattern(format!("regex compilation failed: {e}")))
50}
51
52// ---------------------------------------------------------------------------
53// TOML deserialization types
54// ---------------------------------------------------------------------------
55
56// ---------------------------------------------------------------------------
57// TOML deserialization types
58// ---------------------------------------------------------------------------
59
60/// TOML representation of a pattern file.
61///
62/// This struct deserializes from user-defined TOML pattern files
63/// loaded from `~/.config/oo/patterns/`. Each file defines a single pattern
64/// with optional success and failure configurations.
65#[derive(Deserialize)]
66pub struct PatternFile {
67 /// Regex that matches the command line.
68 pub command_match: String,
69
70 /// Optional success pattern configuration.
71 pub success: Option<SuccessSection>,
72
73 /// Optional failure pattern configuration.
74 pub failure: Option<FailureSection>,
75}
76
77/// TOML configuration for success output extraction.
78///
79/// Supports both legacy pattern+summary format and new strategy-based format.
80#[derive(Deserialize)]
81pub struct SuccessSection {
82 /// Strategy name: "regex" (legacy), "tail", "head", or "grep".
83 #[serde(default)]
84 pub(crate) strategy: Option<String>,
85
86 /// Regex pattern with named capture groups (for legacy format or grep strategy).
87 #[serde(rename = "pattern")]
88 pub(crate) success_pattern: Option<String>,
89
90 /// Summary template with {name} placeholders (for legacy format).
91 pub(crate) summary: Option<String>,
92
93 /// Number of lines (for tail/head strategies).
94 pub(crate) lines: Option<usize>,
95
96 /// Grep pattern (for grep strategy).
97 #[serde(rename = "grep")]
98 pub(crate) grep_pattern: Option<String>,
99}
100
101/// TOML configuration for failure output filtering.
102///
103/// Defines how to extract relevant error information from failed command output.
104/// Multiple strategies are supported: tail, head, grep, and between.
105#[derive(Deserialize)]
106pub struct FailureSection {
107 /// Strategy name: "tail", "head", "grep", or "between".
108 pub(crate) strategy: Option<String>,
109
110 /// Number of lines (for tail/head strategies).
111 pub(crate) lines: Option<usize>,
112
113 /// Grep pattern (for grep strategy).
114 #[serde(rename = "grep")]
115 pub(crate) grep_pattern: Option<String>,
116
117 /// Start delimiter (for between strategy).
118 pub(crate) start: Option<String>,
119
120 /// End delimiter (for between strategy).
121 pub(crate) end: Option<String>,
122}
123
124// ---------------------------------------------------------------------------
125// User patterns (TOML on disk)
126// ---------------------------------------------------------------------------
127
128/// Load user-defined patterns from a directory of TOML files.
129///
130/// Invalid files are silently skipped.
131pub fn load_user_patterns(dir: &Path) -> Vec<Pattern> {
132 let entries = match std::fs::read_dir(dir) {
133 Ok(e) => e,
134 Err(_) => return Vec::new(),
135 };
136
137 let mut patterns = Vec::new();
138 for entry in entries.flatten() {
139 let path = entry.path();
140 if path.extension().is_some_and(|e| e == "toml") {
141 if let Ok(p) = load_pattern_file(&path) {
142 patterns.push(p);
143 }
144 }
145 }
146 patterns
147}
148
149fn load_pattern_file(path: &Path) -> Result<Pattern, Error> {
150 let content =
151 std::fs::read_to_string(path).map_err(|e| Error::Pattern(format!("{path:?}: {e}")))?;
152 parse_pattern_str(&content).map_err(|e| {
153 // Add file path context to any parse errors
154 if let Error::Pattern(msg) = e {
155 Error::Pattern(format!("{path:?}: {msg}"))
156 } else {
157 e
158 }
159 })
160}
161
162/// Parse a pattern definition from TOML string content.
163///
164/// Deserializes a TOML pattern definition into a `Pattern` struct,
165/// validating regex patterns and strategy configurations.
166///
167/// # Arguments
168///
169/// * `content` - TOML-formatted pattern definition
170///
171/// # Returns
172///
173/// A `Pattern` struct if parsing and validation succeed, or an `Error`
174/// if TOML is malformed, regex is invalid, or strategy configuration is incomplete.
175///
176/// # Errors
177///
178/// Returns `Error::Pattern` for:
179/// - TOML parsing failures
180/// - Invalid regular expressions
181/// - Missing required fields (e.g., grep pattern for grep strategy)
182/// - Unknown strategy names
183/// - Regex patterns exceeding maximum length (500 characters)
184///
185/// # Examples
186///
187/// ```
188/// use double_o::pattern::parse_pattern_str;
189///
190/// let toml = r#"
191/// command_match = "myapp test"
192///
193/// [success]
194/// pattern = "(?P<passed>\\d+) passed"
195/// summary = "{passed} tests passed"
196/// "#;
197/// let pattern = parse_pattern_str(toml).unwrap();
198/// ```
199pub fn parse_pattern_str(content: &str) -> Result<Pattern, Error> {
200 let pf: PatternFile =
201 toml::from_str(content).map_err(|e| Error::Pattern(format!("TOML parse: {e}")))?;
202
203 // Validate and compile command_match regex with safety limits
204 let command_match = validate_and_compile_regex(&pf.command_match)?;
205
206 let success = pf
207 .success
208 .map(|s| -> Result<SuccessPattern, Error> {
209 // Determine strategy: explicit strategy field, or default to "regex" for legacy format
210 let strategy = match s.strategy.as_deref().unwrap_or("regex") {
211 "tail" => SuccessStrategy::Tail {
212 lines: s.lines.unwrap_or(30),
213 },
214 "head" => SuccessStrategy::Head {
215 lines: s.lines.unwrap_or(20),
216 },
217 "grep" => {
218 let pat = s.grep_pattern.ok_or_else(|| {
219 Error::Pattern("grep strategy requires 'grep' field".into())
220 })?;
221 let pattern = validate_and_compile_regex(&pat)?;
222 SuccessStrategy::Grep { pattern }
223 }
224 "regex" => {
225 // Legacy format: pattern + summary
226 let pattern = s.success_pattern.ok_or_else(|| {
227 Error::Pattern("regex strategy requires 'pattern' field".into())
228 })?;
229 let summary = s.summary.ok_or_else(|| {
230 Error::Pattern("regex strategy requires 'summary' field".into())
231 })?;
232 let regex = validate_and_compile_regex(&pattern)?;
233 SuccessStrategy::Regex {
234 pattern: regex,
235 summary,
236 }
237 }
238 other => {
239 return Err(Error::Pattern(format!("unknown success strategy: {other}")));
240 }
241 };
242 Ok(SuccessPattern { strategy })
243 })
244 .transpose()?;
245
246 let failure = pf
247 .failure
248 .map(|f| -> Result<FailurePattern, Error> {
249 let strategy = match f.strategy.as_deref().unwrap_or("tail") {
250 "tail" => FailureStrategy::Tail {
251 lines: f.lines.unwrap_or(30),
252 },
253 "head" => FailureStrategy::Head {
254 lines: f.lines.unwrap_or(20),
255 },
256 "grep" => {
257 let pat = f.grep_pattern.ok_or_else(|| {
258 Error::Pattern("grep strategy requires 'grep' field".into())
259 })?;
260 let pattern = validate_and_compile_regex(&pat)?;
261 FailureStrategy::Grep { pattern }
262 }
263 "between" => {
264 let start = f.start.ok_or_else(|| {
265 Error::Pattern("between strategy requires 'start'".into())
266 })?;
267 let end = f
268 .end
269 .ok_or_else(|| Error::Pattern("between strategy requires 'end'".into()))?;
270 FailureStrategy::Between { start, end }
271 }
272 other => {
273 return Err(Error::Pattern(format!("unknown strategy: {other}")));
274 }
275 };
276 Ok(FailurePattern { strategy })
277 })
278 .transpose()?;
279
280 Ok(Pattern {
281 command_match,
282 success,
283 failure,
284 })
285}
286
287/// Validate all regexes in a TOML pattern string with safety limits.
288///
289/// This is used by the learn module to ensure LLM-generated patterns
290/// pass the same validation as manually-written TOML patterns.
291///
292/// # Errors
293///
294/// Returns `Error::Pattern` if TOML is malformed, regex is invalid,
295/// or strategy configuration is incomplete.
296pub fn validate_pattern_regexes(toml_str: &str) -> Result<(), Error> {
297 #[derive(Deserialize)]
298 struct Check {
299 command_match: String,
300 #[serde(default)]
301 success: Option<SuccessSection>,
302 #[serde(default)]
303 failure: Option<FailureSection>,
304 }
305
306 let check: Check =
307 toml::from_str(toml_str).map_err(|e| Error::Pattern(format!("TOML parse: {e}")))?;
308
309 // Validate command_match regex
310 validate_and_compile_regex(&check.command_match)?;
311
312 // Validate success regex if present
313 if let Some(ref s) = check.success {
314 match s.strategy.as_deref().unwrap_or("regex") {
315 "tail" | "head" => {} // no regex to validate
316 "grep" => {
317 let pat = s
318 .grep_pattern
319 .as_ref()
320 .ok_or_else(|| Error::Pattern("grep strategy requires 'grep' field".into()))?;
321 if pat.is_empty() {
322 return Err(Error::Pattern("grep regex must not be empty".into()));
323 }
324 validate_and_compile_regex(pat)?;
325 }
326 "regex" => {
327 let pattern = s.success_pattern.as_ref().ok_or_else(|| {
328 Error::Pattern("regex strategy requires 'pattern' field".into())
329 })?;
330 validate_and_compile_regex(pattern)?;
331 }
332 other => return Err(Error::Pattern(format!("unknown success strategy: {other}"))),
333 }
334 }
335
336 // Validate failure regex if present
337 if let Some(ref f) = check.failure {
338 match f.strategy.as_deref().unwrap_or("tail") {
339 "tail" | "head" => {} // no regex to validate
340 "grep" => {
341 let pat = f
342 .grep_pattern
343 .as_ref()
344 .ok_or_else(|| Error::Pattern("grep strategy requires 'grep' field".into()))?;
345 if pat.is_empty() {
346 return Err(Error::Pattern("grep regex must not be empty".into()));
347 }
348 validate_and_compile_regex(pat)?;
349 }
350 "between" => {
351 let start = f.start.as_ref().ok_or_else(|| {
352 Error::Pattern("between strategy requires 'start' field".into())
353 })?;
354 let end = f.end.as_ref().ok_or_else(|| {
355 Error::Pattern("between strategy requires 'end' field".into())
356 })?;
357 if start.is_empty() {
358 return Err(Error::Pattern("between 'start' must not be empty".into()));
359 }
360 if end.is_empty() {
361 return Err(Error::Pattern("between 'end' must not be empty".into()));
362 }
363 validate_and_compile_regex(start)?;
364 validate_and_compile_regex(end)?;
365 }
366 other => return Err(Error::Pattern(format!("unknown failure strategy: {other}"))),
367 }
368 }
369
370 Ok(())
371}