Skip to main content

sqry_core/plugin/
safe_parse.rs

1//! Safe parsing utilities with resource limits.
2//!
3//! This module provides a centralized, secure parser utility that enforces
4//! input size limits, parse timeouts, and supports external cancellation.
5//! All language plugins should use `SafeParser` to prevent OOM vulnerabilities
6//! from pathological inputs.
7//!
8//! # Security Background
9//!
10//! Tree-sitter parsers can consume unbounded memory when encountering malformed
11//! input that triggers exponential backtracking in error recovery. A 103-byte
12//! input can amplify to 2GB+ memory consumption (~20 million× amplification).
13//!
14//! # Usage
15//!
16//! ```ignore
17//! use sqry_core::plugin::safe_parse::{SafeParser, SafeParserConfig};
18//!
19//! let config = SafeParserConfig::default();
20//! let parser = SafeParser::new(config);
21//!
22//! let result = parser.parse(&language, content, Some(file_path));
23//! match result {
24//!     Ok(tree) => { /* use tree */ }
25//!     Err(ParseError::InputTooLarge { size, max, .. }) => {
26//!         log::warn!("File too large: {} bytes > {} limit", size, max);
27//!     }
28//!     Err(ParseError::ParseTimedOut { timeout_micros, .. }) => {
29//!         log::warn!("Parse timed out after {} ms", timeout_micros / 1000);
30//!     }
31//!     Err(e) => { /* handle other errors */ }
32//! }
33//! ```
34
35use std::ops::ControlFlow;
36use std::path::Path;
37use std::sync::Arc;
38use std::sync::atomic::{AtomicBool, Ordering};
39use std::time::Instant;
40use tree_sitter::{Language, ParseOptions, ParseState, Parser, Tree};
41
42use super::error::ParseError;
43
44/// Default maximum input size: 10 MiB.
45///
46/// This limit prevents unbounded memory allocation from large files while
47/// accommodating most legitimate source files. Generated or minified code
48/// may exceed this limit and require user configuration.
49pub const DEFAULT_MAX_SIZE: usize = 10 * 1024 * 1024;
50
51/// Minimum allowed size limit: 1 MiB.
52///
53/// Users cannot configure a limit below this threshold to ensure basic
54/// functionality is preserved.
55pub const MIN_MAX_SIZE: usize = 1024 * 1024;
56
57/// Maximum allowed size limit: 32 MiB.
58///
59/// Users cannot configure a limit above this threshold to prevent
60/// excessive memory usage from extremely large files.
61pub const MAX_MAX_SIZE: usize = 32 * 1024 * 1024;
62
63/// Default parse timeout: 2 seconds (2,000,000 microseconds).
64///
65/// This timeout prevents runaway parsing on pathological inputs that could
66/// cause exponential backtracking. Most legitimate files parse in <100ms.
67pub const DEFAULT_TIMEOUT_MICROS: u64 = 2_000_000;
68
69/// Minimum allowed timeout: 100ms (100,000 microseconds).
70///
71/// Users cannot configure a timeout below this threshold as it would
72/// cause false positives on normal files.
73pub const MIN_TIMEOUT_MICROS: u64 = 100_000;
74
75/// Maximum allowed timeout: 5 seconds (5,000,000 microseconds).
76///
77/// Users cannot configure a timeout above this threshold to ensure
78/// pathological inputs are caught within reasonable time.
79pub const MAX_TIMEOUT_MICROS: u64 = 5_000_000;
80
81/// Configuration for `SafeParser` with bounded limits.
82///
83/// All limits are bounded to prevent users from disabling security protections.
84/// Values outside the allowed range are clamped to the nearest bound.
85///
86/// # Bounds
87///
88/// - `max_input_size`: [1 MiB, 32 MiB]
89/// - `timeout_micros`: [100,000 µs, 5,000,000 µs] (100ms to 5s)
90///
91/// # Example
92///
93/// ```
94/// use sqry_core::plugin::safe_parse::SafeParserConfig;
95///
96/// // Use defaults
97/// let config = SafeParserConfig::default();
98/// assert_eq!(config.max_input_size(), 10 * 1024 * 1024);
99/// assert_eq!(config.timeout_micros(), 2_000_000);
100///
101/// // Custom configuration (values are clamped to bounds)
102/// let config = SafeParserConfig::new()
103///     .with_max_input_size(20 * 1024 * 1024)
104///     .with_timeout_micros(3_000_000);
105/// ```
106#[derive(Debug, Clone)]
107pub struct SafeParserConfig {
108    max_input_size: usize,
109    timeout_micros: u64,
110}
111
112impl Default for SafeParserConfig {
113    fn default() -> Self {
114        Self {
115            max_input_size: DEFAULT_MAX_SIZE,
116            timeout_micros: DEFAULT_TIMEOUT_MICROS,
117        }
118    }
119}
120
121impl SafeParserConfig {
122    /// Create a new configuration with default values.
123    #[must_use]
124    pub fn new() -> Self {
125        Self::default()
126    }
127
128    /// Set maximum input size in bytes.
129    ///
130    /// Value is clamped to [1 MiB, 32 MiB].
131    #[must_use]
132    pub fn with_max_input_size(mut self, size: usize) -> Self {
133        self.max_input_size = size.clamp(MIN_MAX_SIZE, MAX_MAX_SIZE);
134        self
135    }
136
137    /// Set parse timeout in microseconds.
138    ///
139    /// Value is clamped to [100,000 µs, 5,000,000 µs].
140    #[must_use]
141    pub fn with_timeout_micros(mut self, timeout: u64) -> Self {
142        self.timeout_micros = timeout.clamp(MIN_TIMEOUT_MICROS, MAX_TIMEOUT_MICROS);
143        self
144    }
145
146    /// Get current maximum input size.
147    #[must_use]
148    pub fn max_input_size(&self) -> usize {
149        self.max_input_size
150    }
151
152    /// Get current timeout in microseconds.
153    #[must_use]
154    pub fn timeout_micros(&self) -> u64 {
155        self.timeout_micros
156    }
157}
158
159/// A cancellation flag for aborting long-running parse operations.
160///
161/// This flag uses atomic operations for thread-safe cancellation signaling.
162/// The indexer can set this flag to proactively cancel parsing when needed
163/// (e.g., on shutdown, file change, or resource pressure).
164///
165/// # Example
166///
167/// ```
168/// use sqry_core::plugin::safe_parse::CancellationFlag;
169///
170/// let flag = CancellationFlag::new();
171///
172/// // Check if cancelled
173/// assert!(!flag.is_cancelled());
174///
175/// // Signal cancellation
176/// flag.cancel();
177/// assert!(flag.is_cancelled());
178///
179/// // Reset for next file
180/// flag.reset();
181/// assert!(!flag.is_cancelled());
182/// ```
183#[derive(Debug, Clone, Default)]
184pub struct CancellationFlag {
185    cancelled: Arc<AtomicBool>,
186}
187
188impl CancellationFlag {
189    /// Create a new cancellation flag (not cancelled).
190    #[must_use]
191    pub fn new() -> Self {
192        Self {
193            cancelled: Arc::new(AtomicBool::new(false)),
194        }
195    }
196
197    /// Check if cancellation has been requested.
198    #[must_use]
199    pub fn is_cancelled(&self) -> bool {
200        self.cancelled.load(Ordering::Relaxed)
201    }
202
203    /// Signal cancellation.
204    pub fn cancel(&self) {
205        self.cancelled.store(true, Ordering::Relaxed);
206    }
207
208    /// Reset the flag (clear cancellation).
209    ///
210    /// Call this between files to avoid leakage.
211    pub fn reset(&self) {
212        self.cancelled.store(false, Ordering::Relaxed);
213    }
214}
215
216/// Internal state for tracking parse termination reason.
217#[derive(Debug, Clone, Copy, PartialEq, Eq)]
218enum TerminationReason {
219    /// Parse completed normally or failed for other reasons.
220    None,
221    /// Parse was cancelled via cancellation flag.
222    Cancelled,
223    /// Parse exceeded timeout.
224    TimedOut,
225}
226
227/// Pure helper function for finalizing parse results with fail-closed behavior.
228///
229/// **SECURITY CRITICAL**: This function implements fail-closed semantics.
230/// If a termination reason was triggered (timeout or cancellation), we return
231/// an error regardless of whether tree-sitter produced a partial tree.
232///
233/// This function is extracted for deterministic testability. The parse outcome
234/// decision is pure (depends only on inputs) and can be tested without
235/// depending on timing or actual parsing.
236///
237/// # Arguments
238///
239/// * `termination_reason` - Why parsing terminated (`None`, `Cancelled`, `TimedOut`)
240/// * `tree` - The tree-sitter result (`Some` = tree produced, `None` = no tree)
241/// * `file` - Optional file path for error context
242/// * `timeout_micros` - Timeout value for error reporting
243///
244/// # Returns
245///
246/// * `Ok(Tree)` only if `termination_reason` is `None` AND tree is `Some`
247/// * `Err(ParseCancelled)` if `termination_reason` is `Cancelled` (regardless of tree)
248/// * `Err(ParseTimedOut)` if `termination_reason` is `TimedOut` (regardless of tree)
249/// * `Err(TreeSitterFailed)` if `termination_reason` is `None` but tree is `None`
250fn finalize_parse_result(
251    termination_reason: TerminationReason,
252    tree: Option<Tree>,
253    file: Option<&Path>,
254    timeout_micros: u64,
255) -> Result<Tree, ParseError> {
256    // SECURITY: Check termination reason FIRST, before checking if tree exists.
257    // Tree-sitter may return a partial tree even after timeout/cancellation.
258    // We must fail-closed: if timeout or cancellation was triggered, return an error
259    // regardless of whether a partial tree was produced.
260    match termination_reason {
261        TerminationReason::Cancelled => {
262            log::warn!(
263                "Parse cancelled{}",
264                file.map(|f| format!(" (file: {})", f.display()))
265                    .unwrap_or_default()
266            );
267            return Err(ParseError::ParseCancelled {
268                reason: "cancelled during parsing".to_string(),
269                file: file.map(Path::to_path_buf),
270            });
271        }
272        TerminationReason::TimedOut => {
273            log::warn!(
274                "Parse timed out after {} ms{}",
275                timeout_micros / 1000,
276                file.map(|f| format!(" (file: {})", f.display()))
277                    .unwrap_or_default()
278            );
279            return Err(ParseError::ParseTimedOut {
280                timeout_micros,
281                file: file.map(Path::to_path_buf),
282            });
283        }
284        TerminationReason::None => {
285            // No termination requested, proceed to check tree
286        }
287    }
288
289    // If no termination was requested, check if tree-sitter produced a tree
290    if let Some(t) = tree {
291        Ok(t)
292    } else {
293        log::warn!(
294            "Parse failed{}",
295            file.map(|f| format!(" (file: {})", f.display()))
296                .unwrap_or_default()
297        );
298        Err(ParseError::TreeSitterFailed)
299    }
300}
301
302/// Safe parser with resource limits and cancellation support.
303///
304/// `SafeParser` wraps tree-sitter parsing with:
305/// - Input size validation (prevents unbounded allocation)
306/// - Parse timeout (prevents exponential backtracking)
307/// - External cancellation (allows proactive abort)
308///
309/// All language plugins should use this utility instead of creating
310/// parsers directly to ensure consistent security policy.
311///
312/// # Thread Safety
313///
314/// `SafeParser` is `Send + Sync` but the underlying tree-sitter `Parser`
315/// is created per-call. This is intentional to avoid thread-safety issues
316/// with tree-sitter's internal state.
317///
318/// # Example
319///
320/// ```ignore
321/// use sqry_core::plugin::safe_parse::{SafeParser, SafeParserConfig};
322/// use tree_sitter_rust::LANGUAGE;
323///
324/// let parser = SafeParser::new(SafeParserConfig::default());
325/// let content = b"fn main() {}";
326///
327/// match parser.parse(&LANGUAGE.into(), content, None) {
328///     Ok(tree) => println!("Parsed {} nodes", tree.root_node().child_count()),
329///     Err(e) => eprintln!("Parse failed: {}", e),
330/// }
331/// ```
332#[derive(Debug, Clone)]
333pub struct SafeParser {
334    config: SafeParserConfig,
335    cancellation_flag: Option<CancellationFlag>,
336}
337
338impl Default for SafeParser {
339    fn default() -> Self {
340        Self::new(SafeParserConfig::default())
341    }
342}
343
344impl SafeParser {
345    /// Create a new safe parser with the given configuration.
346    #[must_use]
347    pub fn new(config: SafeParserConfig) -> Self {
348        Self {
349            config,
350            cancellation_flag: None,
351        }
352    }
353
354    /// Create a safe parser with default configuration.
355    #[must_use]
356    pub fn with_defaults() -> Self {
357        Self::default()
358    }
359
360    /// Set a cancellation flag for external abort signaling.
361    #[must_use]
362    pub fn with_cancellation_flag(mut self, flag: CancellationFlag) -> Self {
363        self.cancellation_flag = Some(flag);
364        self
365    }
366
367    /// Get the current configuration.
368    #[must_use]
369    pub fn config(&self) -> &SafeParserConfig {
370        &self.config
371    }
372
373    /// Parse source code with resource limits.
374    ///
375    /// # Arguments
376    ///
377    /// * `language` - Tree-sitter language to use
378    /// * `content` - Source code as bytes (UTF-8 encoded)
379    /// * `file` - Optional file path for error context
380    ///
381    /// # Returns
382    ///
383    /// Parsed tree-sitter AST on success.
384    ///
385    /// # Errors
386    ///
387    /// - `ParseError::InputTooLarge` - Input exceeds size limit
388    /// - `ParseError::ParseTimedOut` - Parsing exceeded timeout
389    /// - `ParseError::ParseCancelled` - Parsing was cancelled via flag
390    /// - `ParseError::LanguageSetFailed` - Failed to configure parser
391    /// - `ParseError::TreeSitterFailed` - Tree-sitter returned no tree
392    ///
393    /// # Performance
394    ///
395    /// Creates a new `Parser` per call. This is intentional:
396    /// - Avoids thread-safety issues with tree-sitter's state
397    /// - Parser creation is cheap (~1µs)
398    /// - Timeout/cancellation state is per-parse
399    ///
400    /// # Implementation Note
401    ///
402    /// Uses the direct `parser.parse()` API with `set_timeout_micros()` instead of
403    /// `parse_with_options()` with a chunked callback. The callback-based API has
404    /// compatibility issues with some grammars (e.g., tree-sitter-groovy on multi-
405    /// function files). The direct API works universally.
406    ///
407    /// While `set_timeout_micros` is deprecated in tree-sitter 0.25, it remains
408    /// functional and provides better grammar compatibility than the callback approach.
409    ///
410    /// # Cancellation Limitation
411    ///
412    /// Mid-parse cancellation is not supported with the direct API. Cancellation is
413    /// checked before and after parsing, but not during. For most source files (which
414    /// parse in <100ms), this is acceptable. For very large files, the timeout provides
415    /// protection.
416    ///
417    // DEPRECATION: We use `set_timeout_micros` because the recommended replacement,
418    // `parse_with_options` (with a callback), has proven to be incompatible with
419    // certain grammars (e.g., tree-sitter-groovy). This approach ensures universal
420    // grammar compatibility.
421    pub fn parse(
422        &self,
423        language: &Language,
424        content: &[u8],
425        file: Option<&Path>,
426    ) -> Result<Tree, ParseError> {
427        // Check cancellation before starting
428        if let Some(ref flag) = self.cancellation_flag
429            && flag.is_cancelled()
430        {
431            return Err(ParseError::ParseCancelled {
432                reason: "cancelled before parse started".to_string(),
433                file: file.map(Path::to_path_buf),
434            });
435        }
436
437        // Check input size limit
438        if content.len() > self.config.max_input_size {
439            log::warn!(
440                "Input too large: {} bytes exceeds {} limit{}",
441                content.len(),
442                self.config.max_input_size,
443                file.map(|f| format!(" (file: {})", f.display()))
444                    .unwrap_or_default()
445            );
446            return Err(ParseError::InputTooLarge {
447                size: content.len(),
448                max: self.config.max_input_size,
449                file: file.map(Path::to_path_buf),
450            });
451        }
452
453        // Create and configure parser
454        let mut parser = Parser::new();
455        parser
456            .set_language(language)
457            .map_err(|e| ParseError::LanguageSetFailed(e.to_string()))?;
458
459        // Track start time for timeout enforcement
460        let start_time = Instant::now();
461        let timeout_micros = self.config.timeout_micros;
462
463        // Clone the cancellation flag for use inside the progress callback.
464        // The underlying AtomicBool is shared via Arc, so this is cheap.
465        let cancellation_flag = self.cancellation_flag.clone();
466
467        // Set up timeout + cancellation via progress callback (tree-sitter 0.26+).
468        // set_timeout_micros was removed in tree-sitter 0.26; the progress_callback
469        // is now the canonical way to abort a long-running parse.
470        let mut progress_fn = move |_: &ParseState| -> ControlFlow<()> {
471            if let Some(ref flag) = cancellation_flag
472                && flag.is_cancelled()
473            {
474                return ControlFlow::Break(());
475            }
476            #[allow(clippy::cast_possible_truncation)]
477            // u64 holds 584+ years of µs; max timeout is 5s
478            if start_time.elapsed().as_micros() as u64 > timeout_micros {
479                ControlFlow::Break(())
480            } else {
481                ControlFlow::Continue(())
482            }
483        };
484        let options = ParseOptions::new().progress_callback(&mut progress_fn);
485
486        // Parse with timeout/cancellation enforcement via progress callback.
487        let tree = parser.parse_with_options(
488            &mut |i, _| content.get(i..).unwrap_or_default(),
489            None,
490            Some(options),
491        );
492
493        // Determine termination reason (fail-closed semantics)
494        // SECURITY: Check timeout FIRST using elapsed time, regardless of whether tree-sitter
495        // produced a tree. Tree-sitter may return partial trees even after timeout.
496        // We must fail-closed: if we exceeded timeout, return an error.
497        #[allow(clippy::cast_possible_truncation)] // u64 holds 584+ years of µs; timeout max is 5s
498        let elapsed_micros = start_time.elapsed().as_micros() as u64;
499        let termination_reason = if let Some(ref flag) = self.cancellation_flag
500            && flag.is_cancelled()
501        {
502            // Cancellation was requested (possibly during parse)
503            TerminationReason::Cancelled
504        } else if elapsed_micros > self.config.timeout_micros {
505            // Timeout occurred - fail-closed regardless of whether tree was produced
506            TerminationReason::TimedOut
507        } else if tree.is_none() && elapsed_micros >= self.config.timeout_micros {
508            // Edge case: tree-sitter aborted exactly at timeout boundary (returns None)
509            // Treat this as timeout rather than TreeSitterFailed for accurate telemetry
510            TerminationReason::TimedOut
511        } else {
512            TerminationReason::None
513        };
514
515        // Delegate to pure helper for fail-closed result handling
516        finalize_parse_result(termination_reason, tree, file, self.config.timeout_micros)
517    }
518
519    /// Parse source code with file path context.
520    ///
521    /// Convenience method that always includes file path in errors.
522    ///
523    /// # Errors
524    ///
525    /// Same as [`parse`](Self::parse).
526    pub fn parse_file(
527        &self,
528        language: &Language,
529        content: &[u8],
530        file: &Path,
531    ) -> Result<Tree, ParseError> {
532        self.parse(language, content, Some(file))
533    }
534
535    /// Log a summary of the current configuration.
536    ///
537    /// Call this once at startup to record active limits for incident triage.
538    #[allow(clippy::cast_precision_loss)] // max_input_size <= 32 MiB, well under f64 precision limit
539    pub fn log_config(&self) {
540        log::info!(
541            "SafeParser configured: max_size={} bytes ({:.1} MiB), timeout={} ms",
542            self.config.max_input_size,
543            self.config.max_input_size as f64 / (1024.0 * 1024.0),
544            self.config.timeout_micros / 1000
545        );
546    }
547}
548
549/// Parse content using the default safe parser configuration.
550///
551/// This is a convenience function for simple cases. For production use,
552/// prefer creating a `SafeParser` instance with explicit configuration.
553///
554/// # Errors
555///
556/// Same as [`SafeParser::parse`].
557pub fn parse_safe(
558    language: &Language,
559    content: &[u8],
560    file: Option<&Path>,
561) -> Result<Tree, ParseError> {
562    SafeParser::default().parse(language, content, file)
563}
564
565#[cfg(test)]
566mod tests {
567    use super::*;
568    use std::path::PathBuf;
569
570    #[test]
571    fn test_config_defaults() {
572        let config = SafeParserConfig::default();
573        assert_eq!(config.max_input_size(), DEFAULT_MAX_SIZE);
574        assert_eq!(config.timeout_micros(), DEFAULT_TIMEOUT_MICROS);
575    }
576
577    #[test]
578    fn test_config_builder() {
579        let config = SafeParserConfig::new()
580            .with_max_input_size(20 * 1024 * 1024)
581            .with_timeout_micros(3_000_000);
582
583        assert_eq!(config.max_input_size(), 20 * 1024 * 1024);
584        assert_eq!(config.timeout_micros(), 3_000_000);
585    }
586
587    #[test]
588    fn test_config_clamping_min() {
589        // Below minimum should clamp up
590        let config = SafeParserConfig::new()
591            .with_max_input_size(100) // Way below 1 MiB
592            .with_timeout_micros(1000); // Way below 100ms
593
594        assert_eq!(config.max_input_size(), MIN_MAX_SIZE);
595        assert_eq!(config.timeout_micros(), MIN_TIMEOUT_MICROS);
596    }
597
598    #[test]
599    fn test_config_clamping_max() {
600        // Above maximum should clamp down
601        let config = SafeParserConfig::new()
602            .with_max_input_size(100 * 1024 * 1024) // 100 MiB > 32 MiB max
603            .with_timeout_micros(10_000_000); // 10s > 5s max
604
605        assert_eq!(config.max_input_size(), MAX_MAX_SIZE);
606        assert_eq!(config.timeout_micros(), MAX_TIMEOUT_MICROS);
607    }
608
609    #[test]
610    fn test_cancellation_flag() {
611        let flag = CancellationFlag::new();
612
613        assert!(!flag.is_cancelled());
614
615        flag.cancel();
616        assert!(flag.is_cancelled());
617
618        flag.reset();
619        assert!(!flag.is_cancelled());
620    }
621
622    #[test]
623    fn test_cancellation_flag_clone() {
624        let flag1 = CancellationFlag::new();
625        let flag2 = flag1.clone();
626
627        flag1.cancel();
628        assert!(flag2.is_cancelled()); // Clone shares the same Arc
629    }
630
631    #[test]
632    fn test_safe_parser_creation() {
633        let parser = SafeParser::with_defaults();
634        assert_eq!(parser.config().max_input_size(), DEFAULT_MAX_SIZE);
635        assert_eq!(parser.config().timeout_micros(), DEFAULT_TIMEOUT_MICROS);
636    }
637
638    #[test]
639    fn test_safe_parser_with_config() {
640        let config = SafeParserConfig::new().with_max_input_size(5 * 1024 * 1024);
641        let parser = SafeParser::new(config);
642
643        assert_eq!(parser.config().max_input_size(), 5 * 1024 * 1024);
644    }
645
646    #[test]
647    fn test_safe_parser_with_cancellation() {
648        let flag = CancellationFlag::new();
649        let parser = SafeParser::with_defaults().with_cancellation_flag(flag.clone());
650
651        // Parser should have the flag
652        assert!(parser.cancellation_flag.is_some());
653    }
654
655    #[test]
656    fn test_input_too_large_error() {
657        // Create parser with tiny limit for testing
658        let config = SafeParserConfig::new().with_max_input_size(MIN_MAX_SIZE);
659        let parser = SafeParser::new(config);
660
661        // Content larger than 1 MiB
662        let large_content = vec![b'x'; MIN_MAX_SIZE + 1];
663
664        // Use a dummy language (we'll hit size check before parsing)
665        let language = tree_sitter_rust::LANGUAGE.into();
666        let result = parser.parse(&language, &large_content, None);
667
668        match result {
669            Err(ParseError::InputTooLarge { size, max, file }) => {
670                assert_eq!(size, MIN_MAX_SIZE + 1);
671                assert_eq!(max, MIN_MAX_SIZE);
672                assert!(file.is_none());
673            }
674            _ => panic!("Expected InputTooLarge error"),
675        }
676    }
677
678    #[test]
679    fn test_input_too_large_with_file() {
680        let config = SafeParserConfig::new().with_max_input_size(MIN_MAX_SIZE);
681        let parser = SafeParser::new(config);
682
683        let large_content = vec![b'x'; MIN_MAX_SIZE + 1];
684        let file_path = PathBuf::from("/path/to/large.rs");
685        let language = tree_sitter_rust::LANGUAGE.into();
686
687        let result = parser.parse_file(&language, &large_content, &file_path);
688
689        match result {
690            Err(ParseError::InputTooLarge { file, .. }) => {
691                assert_eq!(file, Some(file_path));
692            }
693            _ => panic!("Expected InputTooLarge error with file path"),
694        }
695    }
696
697    #[test]
698    fn test_cancelled_before_parse() {
699        let flag = CancellationFlag::new();
700        flag.cancel(); // Cancel before parsing
701
702        let parser = SafeParser::with_defaults().with_cancellation_flag(flag);
703
704        let content = b"fn main() {}";
705        let language = tree_sitter_rust::LANGUAGE.into();
706        let result = parser.parse(&language, content, None);
707
708        match result {
709            Err(ParseError::ParseCancelled { reason, .. }) => {
710                assert!(reason.contains("before parse started"));
711            }
712            _ => panic!("Expected ParseCancelled error"),
713        }
714    }
715
716    #[test]
717    fn test_successful_parse() {
718        let parser = SafeParser::with_defaults();
719        let content = b"fn main() {}";
720        let language = tree_sitter_rust::LANGUAGE.into();
721
722        let result = parser.parse(&language, content, None);
723        assert!(result.is_ok());
724
725        let tree = result.unwrap();
726        // Verify we got a valid tree by checking root node kind
727        assert_eq!(tree.root_node().kind(), "source_file");
728    }
729
730    #[test]
731    fn test_successful_parse_with_file() {
732        let parser = SafeParser::with_defaults();
733        let content = b"fn main() { let x = 42; }";
734        let file_path = PathBuf::from("test.rs");
735        let language = tree_sitter_rust::LANGUAGE.into();
736
737        let result = parser.parse_file(&language, content, &file_path);
738        assert!(result.is_ok());
739    }
740
741    #[test]
742    fn test_parse_safe_convenience() {
743        let content = b"fn foo() {}";
744        let language = tree_sitter_rust::LANGUAGE.into();
745
746        let result = parse_safe(&language, content, None);
747        assert!(result.is_ok());
748    }
749
750    #[test]
751    #[allow(clippy::assertions_on_constants)] // These assertions serve as documentation
752    fn test_constants_sanity() {
753        // Verify constant relationships
754        assert!(MIN_MAX_SIZE < DEFAULT_MAX_SIZE);
755        assert!(DEFAULT_MAX_SIZE < MAX_MAX_SIZE);
756        assert!(MIN_TIMEOUT_MICROS < DEFAULT_TIMEOUT_MICROS);
757        assert!(DEFAULT_TIMEOUT_MICROS < MAX_TIMEOUT_MICROS);
758
759        // Verify human-friendly values
760        assert_eq!(MIN_MAX_SIZE, 1024 * 1024); // 1 MiB
761        assert_eq!(DEFAULT_MAX_SIZE, 10 * 1024 * 1024); // 10 MiB
762        assert_eq!(MAX_MAX_SIZE, 32 * 1024 * 1024); // 32 MiB
763        assert_eq!(MIN_TIMEOUT_MICROS, 100_000); // 100ms
764        assert_eq!(DEFAULT_TIMEOUT_MICROS, 2_000_000); // 2s
765        assert_eq!(MAX_TIMEOUT_MICROS, 5_000_000); // 5s
766    }
767
768    #[test]
769    fn test_termination_reason_enum() {
770        // Test that enum variants are distinct
771        assert_ne!(TerminationReason::None, TerminationReason::Cancelled);
772        assert_ne!(TerminationReason::None, TerminationReason::TimedOut);
773        assert_ne!(TerminationReason::Cancelled, TerminationReason::TimedOut);
774    }
775
776    /// Test fail-closed behavior: timeout must return error even if tree was partially produced.
777    ///
778    /// This test verifies the security-critical fail-closed behavior:
779    /// When a timeout is triggered during parsing, we MUST return `ParseTimedOut` error
780    /// regardless of whether tree-sitter produced a partial tree.
781    ///
782    /// The fix for this was: check `termination_reason` BEFORE checking if a tree exists,
783    /// rather than only checking `termination_reason` when `tree` is `None`.
784    #[test]
785    fn test_timeout_returns_error_fail_closed() {
786        // Use minimum timeout (100ms) with code that might trigger timeout
787        let config = SafeParserConfig::new().with_timeout_micros(MIN_TIMEOUT_MICROS);
788        let parser = SafeParser::new(config);
789
790        // Complex-ish code that might take some time to parse
791        // Even if it parses faster than 100ms, this test still validates the happy path.
792        // The key security guarantee is that IF the timeout triggers, we fail.
793        let content = br#"
794            fn complex_function() {
795                let x = vec![1, 2, 3, 4, 5];
796                for i in x.iter() {
797                    if *i > 3 {
798                        println!("{}", i);
799                    }
800                }
801            }
802        "#;
803
804        let language = tree_sitter_rust::LANGUAGE.into();
805        let result = parser.parse(&language, content, None);
806
807        // Result should be either:
808        // - Ok(tree) if parsing completed within timeout
809        // - Err(ParseTimedOut) if timeout was triggered
810        // The key is: it must NEVER return Ok(partial_tree) after timeout
811        match result {
812            Ok(_tree) => {
813                // Parsing completed within timeout - that's fine
814                // This test primarily documents the fail-closed requirement
815            }
816            Err(ParseError::ParseTimedOut { timeout_micros, .. }) => {
817                // Timeout triggered - verify we got the error, not a partial tree
818                assert_eq!(timeout_micros, MIN_TIMEOUT_MICROS);
819            }
820            Err(ParseError::TreeSitterFailed) => {
821                // Callback compatibility issue - acceptable
822            }
823            Err(e) => {
824                panic!("Unexpected error type: {e:?}");
825            }
826        }
827    }
828
829    /// Test fail-closed behavior with cancellation.
830    ///
831    /// This test verifies that when cancellation is triggered DURING parsing,
832    /// we return `ParseCancelled` even if a partial tree was produced.
833    #[test]
834    fn test_cancellation_during_parse_fail_closed() {
835        use std::thread;
836        use std::time::Duration;
837
838        let flag = CancellationFlag::new();
839        let flag_clone = flag.clone();
840
841        // Use short timeout to give cancellation time to trigger
842        let config = SafeParserConfig::new().with_timeout_micros(MIN_TIMEOUT_MICROS);
843        let parser = SafeParser::new(config).with_cancellation_flag(flag);
844
845        // Spawn thread that cancels after a tiny delay
846        let handle = thread::spawn(move || {
847            thread::sleep(Duration::from_micros(10));
848            flag_clone.cancel();
849        });
850
851        // Moderately complex code
852        let content = br"
853            fn foo() { let x = 1; }
854            fn bar() { let y = 2; }
855            fn baz() { let z = 3; }
856        ";
857
858        let language = tree_sitter_rust::LANGUAGE.into();
859        let result = parser.parse(&language, content, None);
860
861        handle.join().unwrap();
862
863        // Result can be:
864        // - Ok: parsed before cancel took effect
865        // - Err(ParseCancelled): cancel triggered during parse
866        // - Err(ParseTimedOut): timeout triggered
867        // - Err(TreeSitterFailed): callback compatibility
868        // Key: NEVER Ok(partial_tree) after cancellation was triggered
869        match result {
870            Ok(_)
871            | Err(
872                ParseError::ParseCancelled { .. }
873                | ParseError::ParseTimedOut { .. }
874                | ParseError::TreeSitterFailed,
875            ) => {
876                // All acceptable outcomes - the key is fail-closed behavior
877            }
878            Err(e) => {
879                panic!("Unexpected error type: {e:?}");
880            }
881        }
882    }
883
884    // ========================================================================
885    // DETERMINISTIC FAIL-CLOSED TESTS
886    // These tests call finalize_parse_result directly with controlled inputs
887    // to verify fail-closed behavior without depending on timing or parsing.
888    // ========================================================================
889
890    /// Helper to create a valid tree for testing `finalize_parse_result`.
891    fn create_test_tree() -> Tree {
892        let mut parser = tree_sitter::Parser::new();
893        parser
894            .set_language(&tree_sitter_rust::LANGUAGE.into())
895            .unwrap();
896        parser.parse(b"fn main() {}", None).unwrap()
897    }
898
899    /// DETERMINISTIC: Timeout + Some(tree) must return `ParseTimedOut` error.
900    ///
901    /// This is the critical fail-closed test. Even if tree-sitter produces
902    /// a partial tree after timeout, we MUST return an error.
903    #[test]
904    fn test_finalize_timeout_with_tree_returns_error() {
905        let tree = create_test_tree();
906        let result =
907            finalize_parse_result(TerminationReason::TimedOut, Some(tree), None, 2_000_000);
908
909        match result {
910            Err(ParseError::ParseTimedOut {
911                timeout_micros,
912                file,
913            }) => {
914                assert_eq!(timeout_micros, 2_000_000);
915                assert!(file.is_none());
916            }
917            _ => panic!("Expected ParseTimedOut, got {result:?}"),
918        }
919    }
920
921    /// DETERMINISTIC: Cancellation + Some(tree) must return `ParseCancelled` error.
922    ///
923    /// Same as timeout case: even with a tree, cancellation returns error.
924    #[test]
925    fn test_finalize_cancelled_with_tree_returns_error() {
926        let tree = create_test_tree();
927        let result =
928            finalize_parse_result(TerminationReason::Cancelled, Some(tree), None, 2_000_000);
929
930        match result {
931            Err(ParseError::ParseCancelled { reason, file }) => {
932                assert!(reason.contains("cancelled"));
933                assert!(file.is_none());
934            }
935            _ => panic!("Expected ParseCancelled, got {result:?}"),
936        }
937    }
938
939    /// DETERMINISTIC: Timeout + None must return `ParseTimedOut` error.
940    #[test]
941    fn test_finalize_timeout_without_tree_returns_error() {
942        let result = finalize_parse_result(TerminationReason::TimedOut, None, None, 2_000_000);
943
944        match result {
945            Err(ParseError::ParseTimedOut { .. }) => {}
946            _ => panic!("Expected ParseTimedOut, got {result:?}"),
947        }
948    }
949
950    /// DETERMINISTIC: Cancellation + None must return `ParseCancelled` error.
951    #[test]
952    fn test_finalize_cancelled_without_tree_returns_error() {
953        let result = finalize_parse_result(TerminationReason::Cancelled, None, None, 2_000_000);
954
955        match result {
956            Err(ParseError::ParseCancelled { .. }) => {}
957            _ => panic!("Expected ParseCancelled, got {result:?}"),
958        }
959    }
960
961    /// DETERMINISTIC: No termination + Some(tree) returns Ok(tree).
962    #[test]
963    fn test_finalize_success_with_tree() {
964        let tree = create_test_tree();
965        let result = finalize_parse_result(TerminationReason::None, Some(tree), None, 2_000_000);
966
967        assert!(result.is_ok());
968        assert_eq!(result.unwrap().root_node().kind(), "source_file");
969    }
970
971    /// DETERMINISTIC: No termination + None returns `TreeSitterFailed`.
972    #[test]
973    fn test_finalize_failure_without_tree() {
974        let result = finalize_parse_result(TerminationReason::None, None, None, 2_000_000);
975
976        match result {
977            Err(ParseError::TreeSitterFailed) => {}
978            _ => panic!("Expected TreeSitterFailed, got {result:?}"),
979        }
980    }
981
982    /// DETERMINISTIC: Verify file path is included in timeout error.
983    #[test]
984    fn test_finalize_timeout_includes_file_path() {
985        let tree = create_test_tree();
986        let file_path = Path::new("/path/to/test.rs");
987        let result = finalize_parse_result(
988            TerminationReason::TimedOut,
989            Some(tree),
990            Some(file_path),
991            1_500_000,
992        );
993
994        match result {
995            Err(ParseError::ParseTimedOut {
996                timeout_micros,
997                file,
998            }) => {
999                assert_eq!(timeout_micros, 1_500_000);
1000                assert_eq!(file, Some(PathBuf::from("/path/to/test.rs")));
1001            }
1002            _ => panic!("Expected ParseTimedOut with file path, got {result:?}"),
1003        }
1004    }
1005
1006    /// DETERMINISTIC: Verify file path is included in cancellation error.
1007    #[test]
1008    fn test_finalize_cancelled_includes_file_path() {
1009        let tree = create_test_tree();
1010        let file_path = Path::new("/some/code.rs");
1011        let result = finalize_parse_result(
1012            TerminationReason::Cancelled,
1013            Some(tree),
1014            Some(file_path),
1015            2_000_000,
1016        );
1017
1018        match result {
1019            Err(ParseError::ParseCancelled { file, .. }) => {
1020                assert_eq!(file, Some(PathBuf::from("/some/code.rs")));
1021            }
1022            _ => panic!("Expected ParseCancelled with file path, got {result:?}"),
1023        }
1024    }
1025}