Skip to main content

double_o/pattern/
toml.rs

1use regex::Regex;
2use regex::RegexBuilder;
3use serde::Deserialize;
4use std::path::Path;
5
6use super::{FailurePattern, FailureStrategy, Pattern, SuccessPattern, SuccessStrategy};
7use crate::error::Error;
8
9// ---------------------------------------------------------------------------
10// Regex validation limits
11// ---------------------------------------------------------------------------
12
13/// Maximum allowed length for user-provided regex patterns.
14///
15/// This limit prevents overly complex regex patterns that could cause
16/// performance issues or unexpected ReDOS attacks.
17const MAX_REGEX_LENGTH: usize = 500;
18
19/// Size limit for regex compilation (in bytes).
20///
21/// Prevents pathological regex patterns from consuming excessive memory.
22/// Set to 100 KB - ample for all reasonable patterns while still limiting ReDOS risk.
23const REGEX_SIZE_LIMIT: usize = 100 * 1024; // 100 KB
24
25/// Validate and compile a user-provided regex string with safety limits.
26///
27/// This function checks that the regex string is not overly long and compiles
28/// it with a reasonable size limit to prevent resource exhaustion issues.
29///
30/// # Arguments
31///
32/// * `pattern` - The regex pattern string to compile
33///
34/// # Errors
35///
36/// Returns `Error::Pattern` if the regex is too long or fails to compile.
37fn validate_and_compile_regex(pattern: &str) -> Result<Regex, Error> {
38    if pattern.len() > MAX_REGEX_LENGTH {
39        return Err(Error::Pattern(format!(
40            "regex too long ({} > {} chars)",
41            pattern.len(),
42            MAX_REGEX_LENGTH
43        )));
44    }
45
46    RegexBuilder::new(pattern)
47        .size_limit(REGEX_SIZE_LIMIT)
48        .build()
49        .map_err(|e| Error::Pattern(format!("regex compilation failed: {e}")))
50}
51
52// ---------------------------------------------------------------------------
53// TOML deserialization types
54// ---------------------------------------------------------------------------
55
56// ---------------------------------------------------------------------------
57// TOML deserialization types
58// ---------------------------------------------------------------------------
59
60/// TOML representation of a pattern file.
61///
62/// This struct deserializes from user-defined TOML pattern files
63/// loaded from `~/.config/oo/patterns/`. Each file defines a single pattern
64/// with optional success and failure configurations.
65#[derive(Deserialize)]
66pub struct PatternFile {
67    /// Regex that matches the command line.
68    pub command_match: String,
69
70    /// Optional success pattern configuration.
71    pub success: Option<SuccessSection>,
72
73    /// Optional failure pattern configuration.
74    pub failure: Option<FailureSection>,
75}
76
77/// TOML configuration for success output extraction.
78///
79/// Supports both legacy pattern+summary format and new strategy-based format.
80#[derive(Deserialize)]
81pub struct SuccessSection {
82    /// Strategy name: "regex" (legacy), "tail", "head", or "grep".
83    #[serde(default)]
84    pub(crate) strategy: Option<String>,
85
86    /// Regex pattern with named capture groups (for legacy format or grep strategy).
87    #[serde(rename = "pattern")]
88    pub(crate) success_pattern: Option<String>,
89
90    /// Summary template with {name} placeholders (for legacy format).
91    pub(crate) summary: Option<String>,
92
93    /// Number of lines (for tail/head strategies).
94    pub(crate) lines: Option<usize>,
95
96    /// Grep pattern (for grep strategy).
97    #[serde(rename = "grep")]
98    pub(crate) grep_pattern: Option<String>,
99}
100
101/// TOML configuration for failure output filtering.
102///
103/// Defines how to extract relevant error information from failed command output.
104/// Multiple strategies are supported: tail, head, grep, and between.
105#[derive(Deserialize)]
106pub struct FailureSection {
107    /// Strategy name: "tail", "head", "grep", or "between".
108    pub(crate) strategy: Option<String>,
109
110    /// Number of lines (for tail/head strategies).
111    pub(crate) lines: Option<usize>,
112
113    /// Grep pattern (for grep strategy).
114    #[serde(rename = "grep")]
115    pub(crate) grep_pattern: Option<String>,
116
117    /// Start delimiter (for between strategy).
118    pub(crate) start: Option<String>,
119
120    /// End delimiter (for between strategy).
121    pub(crate) end: Option<String>,
122}
123
124// ---------------------------------------------------------------------------
125// User patterns (TOML on disk)
126// ---------------------------------------------------------------------------
127
128/// Load user-defined patterns from a directory of TOML files.
129///
130/// Invalid files are silently skipped.
131pub fn load_user_patterns(dir: &Path) -> Vec<Pattern> {
132    let entries = match std::fs::read_dir(dir) {
133        Ok(e) => e,
134        Err(_) => return Vec::new(),
135    };
136
137    let mut patterns = Vec::new();
138    for entry in entries.flatten() {
139        let path = entry.path();
140        if path.extension().is_some_and(|e| e == "toml") {
141            if let Ok(p) = load_pattern_file(&path) {
142                patterns.push(p);
143            }
144        }
145    }
146    patterns
147}
148
149fn load_pattern_file(path: &Path) -> Result<Pattern, Error> {
150    let content =
151        std::fs::read_to_string(path).map_err(|e| Error::Pattern(format!("{path:?}: {e}")))?;
152    parse_pattern_str(&content).map_err(|e| {
153        // Add file path context to any parse errors
154        if let Error::Pattern(msg) = e {
155            Error::Pattern(format!("{path:?}: {msg}"))
156        } else {
157            e
158        }
159    })
160}
161
162/// Parse a pattern definition from TOML string content.
163///
164/// Deserializes a TOML pattern definition into a `Pattern` struct,
165/// validating regex patterns and strategy configurations.
166///
167/// # Arguments
168///
169/// * `content` - TOML-formatted pattern definition
170///
171/// # Returns
172///
173/// A `Pattern` struct if parsing and validation succeed, or an `Error`
174/// if TOML is malformed, regex is invalid, or strategy configuration is incomplete.
175///
176/// # Errors
177///
178/// Returns `Error::Pattern` for:
179/// - TOML parsing failures
180/// - Invalid regular expressions
181/// - Missing required fields (e.g., grep pattern for grep strategy)
182/// - Unknown strategy names
183/// - Regex patterns exceeding maximum length (500 characters)
184///
185/// # Examples
186///
187/// ```
188/// use double_o::pattern::parse_pattern_str;
189///
190/// let toml = r#"
191/// command_match = "myapp test"
192///
193/// [success]
194/// pattern = "(?P<passed>\\d+) passed"
195/// summary = "{passed} tests passed"
196/// "#;
197/// let pattern = parse_pattern_str(toml).unwrap();
198/// ```
199pub fn parse_pattern_str(content: &str) -> Result<Pattern, Error> {
200    let pf: PatternFile =
201        toml::from_str(content).map_err(|e| Error::Pattern(format!("TOML parse: {e}")))?;
202
203    // Validate and compile command_match regex with safety limits
204    let command_match = validate_and_compile_regex(&pf.command_match)?;
205
206    let success = pf
207        .success
208        .map(|s| -> Result<SuccessPattern, Error> {
209            // Determine strategy: explicit strategy field, or default to "regex" for legacy format
210            let strategy = match s.strategy.as_deref().unwrap_or("regex") {
211                "tail" => SuccessStrategy::Tail {
212                    lines: s.lines.unwrap_or(30),
213                },
214                "head" => SuccessStrategy::Head {
215                    lines: s.lines.unwrap_or(20),
216                },
217                "grep" => {
218                    let pat = s.grep_pattern.ok_or_else(|| {
219                        Error::Pattern("grep strategy requires 'grep' field".into())
220                    })?;
221                    let pattern = validate_and_compile_regex(&pat)?;
222                    SuccessStrategy::Grep { pattern }
223                }
224                "regex" => {
225                    // Legacy format: pattern + summary
226                    let pattern = s.success_pattern.ok_or_else(|| {
227                        Error::Pattern("regex strategy requires 'pattern' field".into())
228                    })?;
229                    let summary = s.summary.ok_or_else(|| {
230                        Error::Pattern("regex strategy requires 'summary' field".into())
231                    })?;
232                    let regex = validate_and_compile_regex(&pattern)?;
233                    SuccessStrategy::Regex {
234                        pattern: regex,
235                        summary,
236                    }
237                }
238                other => {
239                    return Err(Error::Pattern(format!("unknown success strategy: {other}")));
240                }
241            };
242            Ok(SuccessPattern { strategy })
243        })
244        .transpose()?;
245
246    let failure = pf
247        .failure
248        .map(|f| -> Result<FailurePattern, Error> {
249            let strategy = match f.strategy.as_deref().unwrap_or("tail") {
250                "tail" => FailureStrategy::Tail {
251                    lines: f.lines.unwrap_or(30),
252                },
253                "head" => FailureStrategy::Head {
254                    lines: f.lines.unwrap_or(20),
255                },
256                "grep" => {
257                    let pat = f.grep_pattern.ok_or_else(|| {
258                        Error::Pattern("grep strategy requires 'grep' field".into())
259                    })?;
260                    let pattern = validate_and_compile_regex(&pat)?;
261                    FailureStrategy::Grep { pattern }
262                }
263                "between" => {
264                    let start = f.start.ok_or_else(|| {
265                        Error::Pattern("between strategy requires 'start'".into())
266                    })?;
267                    let end = f
268                        .end
269                        .ok_or_else(|| Error::Pattern("between strategy requires 'end'".into()))?;
270                    FailureStrategy::Between { start, end }
271                }
272                other => {
273                    return Err(Error::Pattern(format!("unknown strategy: {other}")));
274                }
275            };
276            Ok(FailurePattern { strategy })
277        })
278        .transpose()?;
279
280    Ok(Pattern {
281        command_match,
282        success,
283        failure,
284    })
285}
286
287/// Validate all regexes in a TOML pattern string with safety limits.
288///
289/// This is used by the learn module to ensure LLM-generated patterns
290/// pass the same validation as manually-written TOML patterns.
291///
292/// # Errors
293///
294/// Returns `Error::Pattern` if TOML is malformed, regex is invalid,
295/// or strategy configuration is incomplete.
296pub fn validate_pattern_regexes(toml_str: &str) -> Result<(), Error> {
297    #[derive(Deserialize)]
298    struct Check {
299        command_match: String,
300        #[serde(default)]
301        success: Option<SuccessSection>,
302        #[serde(default)]
303        failure: Option<FailureSection>,
304    }
305
306    let check: Check =
307        toml::from_str(toml_str).map_err(|e| Error::Pattern(format!("TOML parse: {e}")))?;
308
309    // Validate command_match regex
310    validate_and_compile_regex(&check.command_match)?;
311
312    // Validate success regex if present
313    if let Some(ref s) = check.success {
314        match s.strategy.as_deref().unwrap_or("regex") {
315            "tail" | "head" => {} // no regex to validate
316            "grep" => {
317                let pat = s
318                    .grep_pattern
319                    .as_ref()
320                    .ok_or_else(|| Error::Pattern("grep strategy requires 'grep' field".into()))?;
321                if pat.is_empty() {
322                    return Err(Error::Pattern("grep regex must not be empty".into()));
323                }
324                validate_and_compile_regex(pat)?;
325            }
326            "regex" => {
327                let pattern = s.success_pattern.as_ref().ok_or_else(|| {
328                    Error::Pattern("regex strategy requires 'pattern' field".into())
329                })?;
330                validate_and_compile_regex(pattern)?;
331            }
332            other => return Err(Error::Pattern(format!("unknown success strategy: {other}"))),
333        }
334    }
335
336    // Validate failure regex if present
337    if let Some(ref f) = check.failure {
338        match f.strategy.as_deref().unwrap_or("tail") {
339            "tail" | "head" => {} // no regex to validate
340            "grep" => {
341                let pat = f
342                    .grep_pattern
343                    .as_ref()
344                    .ok_or_else(|| Error::Pattern("grep strategy requires 'grep' field".into()))?;
345                if pat.is_empty() {
346                    return Err(Error::Pattern("grep regex must not be empty".into()));
347                }
348                validate_and_compile_regex(pat)?;
349            }
350            "between" => {
351                let start = f.start.as_ref().ok_or_else(|| {
352                    Error::Pattern("between strategy requires 'start' field".into())
353                })?;
354                let end = f.end.as_ref().ok_or_else(|| {
355                    Error::Pattern("between strategy requires 'end' field".into())
356                })?;
357                if start.is_empty() {
358                    return Err(Error::Pattern("between 'start' must not be empty".into()));
359                }
360                if end.is_empty() {
361                    return Err(Error::Pattern("between 'end' must not be empty".into()));
362                }
363                validate_and_compile_regex(start)?;
364                validate_and_compile_regex(end)?;
365            }
366            other => return Err(Error::Pattern(format!("unknown failure strategy: {other}"))),
367        }
368    }
369
370    Ok(())
371}