sqry_core/plugin/safe_parse.rs
1//! Safe parsing utilities with resource limits.
2//!
3//! This module provides a centralized, secure parser utility that enforces
4//! input size limits, parse timeouts, and supports external cancellation.
5//! All language plugins should use `SafeParser` to prevent OOM vulnerabilities
6//! from pathological inputs.
7//!
8//! # Security Background
9//!
10//! Tree-sitter parsers can consume unbounded memory when encountering malformed
11//! input that triggers exponential backtracking in error recovery. A 103-byte
12//! input can amplify to 2GB+ memory consumption (~20 million× amplification).
13//!
14//! # Usage
15//!
16//! ```ignore
17//! use sqry_core::plugin::safe_parse::{SafeParser, SafeParserConfig};
18//!
19//! let config = SafeParserConfig::default();
20//! let parser = SafeParser::new(config);
21//!
22//! let result = parser.parse(&language, content, Some(file_path));
23//! match result {
24//! Ok(tree) => { /* use tree */ }
25//! Err(ParseError::InputTooLarge { size, max, .. }) => {
26//! log::warn!("File too large: {} bytes > {} limit", size, max);
27//! }
28//! Err(ParseError::ParseTimedOut { timeout_micros, .. }) => {
29//! log::warn!("Parse timed out after {} ms", timeout_micros / 1000);
30//! }
31//! Err(e) => { /* handle other errors */ }
32//! }
33//! ```
34
35use std::ops::ControlFlow;
36use std::path::Path;
37use std::sync::Arc;
38use std::sync::atomic::{AtomicBool, Ordering};
39use std::time::Instant;
40use tree_sitter::{Language, ParseOptions, ParseState, Parser, Tree};
41
42use super::error::ParseError;
43
44/// Default maximum input size: 10 MiB.
45///
46/// This limit prevents unbounded memory allocation from large files while
47/// accommodating most legitimate source files. Generated or minified code
48/// may exceed this limit and require user configuration.
49pub const DEFAULT_MAX_SIZE: usize = 10 * 1024 * 1024;
50
51/// Minimum allowed size limit: 1 MiB.
52///
53/// Users cannot configure a limit below this threshold to ensure basic
54/// functionality is preserved.
55pub const MIN_MAX_SIZE: usize = 1024 * 1024;
56
57/// Maximum allowed size limit: 32 MiB.
58///
59/// Users cannot configure a limit above this threshold to prevent
60/// excessive memory usage from extremely large files.
61pub const MAX_MAX_SIZE: usize = 32 * 1024 * 1024;
62
63/// Default parse timeout: 2 seconds (2,000,000 microseconds).
64///
65/// This timeout prevents runaway parsing on pathological inputs that could
66/// cause exponential backtracking. Most legitimate files parse in <100ms.
67pub const DEFAULT_TIMEOUT_MICROS: u64 = 2_000_000;
68
69/// Minimum allowed timeout: 100ms (100,000 microseconds).
70///
71/// Users cannot configure a timeout below this threshold as it would
72/// cause false positives on normal files.
73pub const MIN_TIMEOUT_MICROS: u64 = 100_000;
74
75/// Maximum allowed timeout: 5 seconds (5,000,000 microseconds).
76///
77/// Users cannot configure a timeout above this threshold to ensure
78/// pathological inputs are caught within reasonable time.
79pub const MAX_TIMEOUT_MICROS: u64 = 5_000_000;
80
81/// Configuration for `SafeParser` with bounded limits.
82///
83/// All limits are bounded to prevent users from disabling security protections.
84/// Values outside the allowed range are clamped to the nearest bound.
85///
86/// # Bounds
87///
88/// - `max_input_size`: [1 MiB, 32 MiB]
89/// - `timeout_micros`: [100,000 µs, 5,000,000 µs] (100ms to 5s)
90///
91/// # Example
92///
93/// ```
94/// use sqry_core::plugin::safe_parse::SafeParserConfig;
95///
96/// // Use defaults
97/// let config = SafeParserConfig::default();
98/// assert_eq!(config.max_input_size(), 10 * 1024 * 1024);
99/// assert_eq!(config.timeout_micros(), 2_000_000);
100///
101/// // Custom configuration (values are clamped to bounds)
102/// let config = SafeParserConfig::new()
103/// .with_max_input_size(20 * 1024 * 1024)
104/// .with_timeout_micros(3_000_000);
105/// ```
106#[derive(Debug, Clone)]
107pub struct SafeParserConfig {
108 max_input_size: usize,
109 timeout_micros: u64,
110}
111
112impl Default for SafeParserConfig {
113 fn default() -> Self {
114 Self {
115 max_input_size: DEFAULT_MAX_SIZE,
116 timeout_micros: DEFAULT_TIMEOUT_MICROS,
117 }
118 }
119}
120
121impl SafeParserConfig {
122 /// Create a new configuration with default values.
123 #[must_use]
124 pub fn new() -> Self {
125 Self::default()
126 }
127
128 /// Set maximum input size in bytes.
129 ///
130 /// Value is clamped to [1 MiB, 32 MiB].
131 #[must_use]
132 pub fn with_max_input_size(mut self, size: usize) -> Self {
133 self.max_input_size = size.clamp(MIN_MAX_SIZE, MAX_MAX_SIZE);
134 self
135 }
136
137 /// Set parse timeout in microseconds.
138 ///
139 /// Value is clamped to [100,000 µs, 5,000,000 µs].
140 #[must_use]
141 pub fn with_timeout_micros(mut self, timeout: u64) -> Self {
142 self.timeout_micros = timeout.clamp(MIN_TIMEOUT_MICROS, MAX_TIMEOUT_MICROS);
143 self
144 }
145
146 /// Get current maximum input size.
147 #[must_use]
148 pub fn max_input_size(&self) -> usize {
149 self.max_input_size
150 }
151
152 /// Get current timeout in microseconds.
153 #[must_use]
154 pub fn timeout_micros(&self) -> u64 {
155 self.timeout_micros
156 }
157}
158
159/// A cancellation flag for aborting long-running parse operations.
160///
161/// This flag uses atomic operations for thread-safe cancellation signaling.
162/// The indexer can set this flag to proactively cancel parsing when needed
163/// (e.g., on shutdown, file change, or resource pressure).
164///
165/// # Example
166///
167/// ```
168/// use sqry_core::plugin::safe_parse::CancellationFlag;
169///
170/// let flag = CancellationFlag::new();
171///
172/// // Check if cancelled
173/// assert!(!flag.is_cancelled());
174///
175/// // Signal cancellation
176/// flag.cancel();
177/// assert!(flag.is_cancelled());
178///
179/// // Reset for next file
180/// flag.reset();
181/// assert!(!flag.is_cancelled());
182/// ```
183#[derive(Debug, Clone, Default)]
184pub struct CancellationFlag {
185 cancelled: Arc<AtomicBool>,
186}
187
188impl CancellationFlag {
189 /// Create a new cancellation flag (not cancelled).
190 #[must_use]
191 pub fn new() -> Self {
192 Self {
193 cancelled: Arc::new(AtomicBool::new(false)),
194 }
195 }
196
197 /// Check if cancellation has been requested.
198 #[must_use]
199 pub fn is_cancelled(&self) -> bool {
200 self.cancelled.load(Ordering::Relaxed)
201 }
202
203 /// Signal cancellation.
204 pub fn cancel(&self) {
205 self.cancelled.store(true, Ordering::Relaxed);
206 }
207
208 /// Reset the flag (clear cancellation).
209 ///
210 /// Call this between files to avoid leakage.
211 pub fn reset(&self) {
212 self.cancelled.store(false, Ordering::Relaxed);
213 }
214}
215
216/// Internal state for tracking parse termination reason.
217#[derive(Debug, Clone, Copy, PartialEq, Eq)]
218enum TerminationReason {
219 /// Parse completed normally or failed for other reasons.
220 None,
221 /// Parse was cancelled via cancellation flag.
222 Cancelled,
223 /// Parse exceeded timeout.
224 TimedOut,
225}
226
227/// Pure helper function for finalizing parse results with fail-closed behavior.
228///
229/// **SECURITY CRITICAL**: This function implements fail-closed semantics.
230/// If a termination reason was triggered (timeout or cancellation), we return
231/// an error regardless of whether tree-sitter produced a partial tree.
232///
233/// This function is extracted for deterministic testability. The parse outcome
234/// decision is pure (depends only on inputs) and can be tested without
235/// depending on timing or actual parsing.
236///
237/// # Arguments
238///
239/// * `termination_reason` - Why parsing terminated (`None`, `Cancelled`, `TimedOut`)
240/// * `tree` - The tree-sitter result (`Some` = tree produced, `None` = no tree)
241/// * `file` - Optional file path for error context
242/// * `timeout_micros` - Timeout value for error reporting
243///
244/// # Returns
245///
246/// * `Ok(Tree)` only if `termination_reason` is `None` AND tree is `Some`
247/// * `Err(ParseCancelled)` if `termination_reason` is `Cancelled` (regardless of tree)
248/// * `Err(ParseTimedOut)` if `termination_reason` is `TimedOut` (regardless of tree)
249/// * `Err(TreeSitterFailed)` if `termination_reason` is `None` but tree is `None`
250fn finalize_parse_result(
251 termination_reason: TerminationReason,
252 tree: Option<Tree>,
253 file: Option<&Path>,
254 timeout_micros: u64,
255) -> Result<Tree, ParseError> {
256 // SECURITY: Check termination reason FIRST, before checking if tree exists.
257 // Tree-sitter may return a partial tree even after timeout/cancellation.
258 // We must fail-closed: if timeout or cancellation was triggered, return an error
259 // regardless of whether a partial tree was produced.
260 match termination_reason {
261 TerminationReason::Cancelled => {
262 log::warn!(
263 "Parse cancelled{}",
264 file.map(|f| format!(" (file: {})", f.display()))
265 .unwrap_or_default()
266 );
267 return Err(ParseError::ParseCancelled {
268 reason: "cancelled during parsing".to_string(),
269 file: file.map(Path::to_path_buf),
270 });
271 }
272 TerminationReason::TimedOut => {
273 log::warn!(
274 "Parse timed out after {} ms{}",
275 timeout_micros / 1000,
276 file.map(|f| format!(" (file: {})", f.display()))
277 .unwrap_or_default()
278 );
279 return Err(ParseError::ParseTimedOut {
280 timeout_micros,
281 file: file.map(Path::to_path_buf),
282 });
283 }
284 TerminationReason::None => {
285 // No termination requested, proceed to check tree
286 }
287 }
288
289 // If no termination was requested, check if tree-sitter produced a tree
290 if let Some(t) = tree {
291 Ok(t)
292 } else {
293 log::warn!(
294 "Parse failed{}",
295 file.map(|f| format!(" (file: {})", f.display()))
296 .unwrap_or_default()
297 );
298 Err(ParseError::TreeSitterFailed)
299 }
300}
301
302/// Safe parser with resource limits and cancellation support.
303///
304/// `SafeParser` wraps tree-sitter parsing with:
305/// - Input size validation (prevents unbounded allocation)
306/// - Parse timeout (prevents exponential backtracking)
307/// - External cancellation (allows proactive abort)
308///
309/// All language plugins should use this utility instead of creating
310/// parsers directly to ensure consistent security policy.
311///
312/// # Thread Safety
313///
314/// `SafeParser` is `Send + Sync` but the underlying tree-sitter `Parser`
315/// is created per-call. This is intentional to avoid thread-safety issues
316/// with tree-sitter's internal state.
317///
318/// # Example
319///
320/// ```ignore
321/// use sqry_core::plugin::safe_parse::{SafeParser, SafeParserConfig};
322/// use tree_sitter_rust::LANGUAGE;
323///
324/// let parser = SafeParser::new(SafeParserConfig::default());
325/// let content = b"fn main() {}";
326///
327/// match parser.parse(&LANGUAGE.into(), content, None) {
328/// Ok(tree) => println!("Parsed {} nodes", tree.root_node().child_count()),
329/// Err(e) => eprintln!("Parse failed: {}", e),
330/// }
331/// ```
332#[derive(Debug, Clone)]
333pub struct SafeParser {
334 config: SafeParserConfig,
335 cancellation_flag: Option<CancellationFlag>,
336}
337
338impl Default for SafeParser {
339 fn default() -> Self {
340 Self::new(SafeParserConfig::default())
341 }
342}
343
344impl SafeParser {
345 /// Create a new safe parser with the given configuration.
346 #[must_use]
347 pub fn new(config: SafeParserConfig) -> Self {
348 Self {
349 config,
350 cancellation_flag: None,
351 }
352 }
353
354 /// Create a safe parser with default configuration.
355 #[must_use]
356 pub fn with_defaults() -> Self {
357 Self::default()
358 }
359
360 /// Set a cancellation flag for external abort signaling.
361 #[must_use]
362 pub fn with_cancellation_flag(mut self, flag: CancellationFlag) -> Self {
363 self.cancellation_flag = Some(flag);
364 self
365 }
366
367 /// Get the current configuration.
368 #[must_use]
369 pub fn config(&self) -> &SafeParserConfig {
370 &self.config
371 }
372
373 /// Parse source code with resource limits.
374 ///
375 /// # Arguments
376 ///
377 /// * `language` - Tree-sitter language to use
378 /// * `content` - Source code as bytes (UTF-8 encoded)
379 /// * `file` - Optional file path for error context
380 ///
381 /// # Returns
382 ///
383 /// Parsed tree-sitter AST on success.
384 ///
385 /// # Errors
386 ///
387 /// - `ParseError::InputTooLarge` - Input exceeds size limit
388 /// - `ParseError::ParseTimedOut` - Parsing exceeded timeout
389 /// - `ParseError::ParseCancelled` - Parsing was cancelled via flag
390 /// - `ParseError::LanguageSetFailed` - Failed to configure parser
391 /// - `ParseError::TreeSitterFailed` - Tree-sitter returned no tree
392 ///
393 /// # Performance
394 ///
395 /// Creates a new `Parser` per call. This is intentional:
396 /// - Avoids thread-safety issues with tree-sitter's state
397 /// - Parser creation is cheap (~1µs)
398 /// - Timeout/cancellation state is per-parse
399 ///
400 /// # Implementation Note
401 ///
402 /// Uses the direct `parser.parse()` API with `set_timeout_micros()` instead of
403 /// `parse_with_options()` with a chunked callback. The callback-based API has
404 /// compatibility issues with some grammars (e.g., tree-sitter-groovy on multi-
405 /// function files). The direct API works universally.
406 ///
407 /// While `set_timeout_micros` is deprecated in tree-sitter 0.25, it remains
408 /// functional and provides better grammar compatibility than the callback approach.
409 ///
410 /// # Cancellation Limitation
411 ///
412 /// Mid-parse cancellation is not supported with the direct API. Cancellation is
413 /// checked before and after parsing, but not during. For most source files (which
414 /// parse in <100ms), this is acceptable. For very large files, the timeout provides
415 /// protection.
416 ///
417 // DEPRECATION: We use `set_timeout_micros` because the recommended replacement,
418 // `parse_with_options` (with a callback), has proven to be incompatible with
419 // certain grammars (e.g., tree-sitter-groovy). This approach ensures universal
420 // grammar compatibility.
421 pub fn parse(
422 &self,
423 language: &Language,
424 content: &[u8],
425 file: Option<&Path>,
426 ) -> Result<Tree, ParseError> {
427 // Check cancellation before starting
428 if let Some(ref flag) = self.cancellation_flag
429 && flag.is_cancelled()
430 {
431 return Err(ParseError::ParseCancelled {
432 reason: "cancelled before parse started".to_string(),
433 file: file.map(Path::to_path_buf),
434 });
435 }
436
437 // Check input size limit
438 if content.len() > self.config.max_input_size {
439 log::warn!(
440 "Input too large: {} bytes exceeds {} limit{}",
441 content.len(),
442 self.config.max_input_size,
443 file.map(|f| format!(" (file: {})", f.display()))
444 .unwrap_or_default()
445 );
446 return Err(ParseError::InputTooLarge {
447 size: content.len(),
448 max: self.config.max_input_size,
449 file: file.map(Path::to_path_buf),
450 });
451 }
452
453 // Create and configure parser
454 let mut parser = Parser::new();
455 parser
456 .set_language(language)
457 .map_err(|e| ParseError::LanguageSetFailed(e.to_string()))?;
458
459 // Track start time for timeout enforcement
460 let start_time = Instant::now();
461 let timeout_micros = self.config.timeout_micros;
462
463 // Clone the cancellation flag for use inside the progress callback.
464 // The underlying AtomicBool is shared via Arc, so this is cheap.
465 let cancellation_flag = self.cancellation_flag.clone();
466
467 // Set up timeout + cancellation via progress callback (tree-sitter 0.26+).
468 // set_timeout_micros was removed in tree-sitter 0.26; the progress_callback
469 // is now the canonical way to abort a long-running parse.
470 let mut progress_fn = move |_: &ParseState| -> ControlFlow<()> {
471 if let Some(ref flag) = cancellation_flag
472 && flag.is_cancelled()
473 {
474 return ControlFlow::Break(());
475 }
476 #[allow(clippy::cast_possible_truncation)]
477 // u64 holds 584+ years of µs; max timeout is 5s
478 if start_time.elapsed().as_micros() as u64 > timeout_micros {
479 ControlFlow::Break(())
480 } else {
481 ControlFlow::Continue(())
482 }
483 };
484 let options = ParseOptions::new().progress_callback(&mut progress_fn);
485
486 // Parse with timeout/cancellation enforcement via progress callback.
487 let tree = parser.parse_with_options(
488 &mut |i, _| content.get(i..).unwrap_or_default(),
489 None,
490 Some(options),
491 );
492
493 // Determine termination reason (fail-closed semantics)
494 // SECURITY: Check timeout FIRST using elapsed time, regardless of whether tree-sitter
495 // produced a tree. Tree-sitter may return partial trees even after timeout.
496 // We must fail-closed: if we exceeded timeout, return an error.
497 #[allow(clippy::cast_possible_truncation)] // u64 holds 584+ years of µs; timeout max is 5s
498 let elapsed_micros = start_time.elapsed().as_micros() as u64;
499 let termination_reason = if let Some(ref flag) = self.cancellation_flag
500 && flag.is_cancelled()
501 {
502 // Cancellation was requested (possibly during parse)
503 TerminationReason::Cancelled
504 } else if elapsed_micros > self.config.timeout_micros {
505 // Timeout occurred - fail-closed regardless of whether tree was produced
506 TerminationReason::TimedOut
507 } else if tree.is_none() && elapsed_micros >= self.config.timeout_micros {
508 // Edge case: tree-sitter aborted exactly at timeout boundary (returns None)
509 // Treat this as timeout rather than TreeSitterFailed for accurate telemetry
510 TerminationReason::TimedOut
511 } else {
512 TerminationReason::None
513 };
514
515 // Delegate to pure helper for fail-closed result handling
516 finalize_parse_result(termination_reason, tree, file, self.config.timeout_micros)
517 }
518
519 /// Parse source code with file path context.
520 ///
521 /// Convenience method that always includes file path in errors.
522 ///
523 /// # Errors
524 ///
525 /// Same as [`parse`](Self::parse).
526 pub fn parse_file(
527 &self,
528 language: &Language,
529 content: &[u8],
530 file: &Path,
531 ) -> Result<Tree, ParseError> {
532 self.parse(language, content, Some(file))
533 }
534
535 /// Log a summary of the current configuration.
536 ///
537 /// Call this once at startup to record active limits for incident triage.
538 #[allow(clippy::cast_precision_loss)] // max_input_size <= 32 MiB, well under f64 precision limit
539 pub fn log_config(&self) {
540 log::info!(
541 "SafeParser configured: max_size={} bytes ({:.1} MiB), timeout={} ms",
542 self.config.max_input_size,
543 self.config.max_input_size as f64 / (1024.0 * 1024.0),
544 self.config.timeout_micros / 1000
545 );
546 }
547}
548
549/// Parse content using the default safe parser configuration.
550///
551/// This is a convenience function for simple cases. For production use,
552/// prefer creating a `SafeParser` instance with explicit configuration.
553///
554/// # Errors
555///
556/// Same as [`SafeParser::parse`].
557pub fn parse_safe(
558 language: &Language,
559 content: &[u8],
560 file: Option<&Path>,
561) -> Result<Tree, ParseError> {
562 SafeParser::default().parse(language, content, file)
563}
564
565#[cfg(test)]
566mod tests {
567 use super::*;
568 use std::path::PathBuf;
569
570 #[test]
571 fn test_config_defaults() {
572 let config = SafeParserConfig::default();
573 assert_eq!(config.max_input_size(), DEFAULT_MAX_SIZE);
574 assert_eq!(config.timeout_micros(), DEFAULT_TIMEOUT_MICROS);
575 }
576
577 #[test]
578 fn test_config_builder() {
579 let config = SafeParserConfig::new()
580 .with_max_input_size(20 * 1024 * 1024)
581 .with_timeout_micros(3_000_000);
582
583 assert_eq!(config.max_input_size(), 20 * 1024 * 1024);
584 assert_eq!(config.timeout_micros(), 3_000_000);
585 }
586
587 #[test]
588 fn test_config_clamping_min() {
589 // Below minimum should clamp up
590 let config = SafeParserConfig::new()
591 .with_max_input_size(100) // Way below 1 MiB
592 .with_timeout_micros(1000); // Way below 100ms
593
594 assert_eq!(config.max_input_size(), MIN_MAX_SIZE);
595 assert_eq!(config.timeout_micros(), MIN_TIMEOUT_MICROS);
596 }
597
598 #[test]
599 fn test_config_clamping_max() {
600 // Above maximum should clamp down
601 let config = SafeParserConfig::new()
602 .with_max_input_size(100 * 1024 * 1024) // 100 MiB > 32 MiB max
603 .with_timeout_micros(10_000_000); // 10s > 5s max
604
605 assert_eq!(config.max_input_size(), MAX_MAX_SIZE);
606 assert_eq!(config.timeout_micros(), MAX_TIMEOUT_MICROS);
607 }
608
609 #[test]
610 fn test_cancellation_flag() {
611 let flag = CancellationFlag::new();
612
613 assert!(!flag.is_cancelled());
614
615 flag.cancel();
616 assert!(flag.is_cancelled());
617
618 flag.reset();
619 assert!(!flag.is_cancelled());
620 }
621
622 #[test]
623 fn test_cancellation_flag_clone() {
624 let flag1 = CancellationFlag::new();
625 let flag2 = flag1.clone();
626
627 flag1.cancel();
628 assert!(flag2.is_cancelled()); // Clone shares the same Arc
629 }
630
631 #[test]
632 fn test_safe_parser_creation() {
633 let parser = SafeParser::with_defaults();
634 assert_eq!(parser.config().max_input_size(), DEFAULT_MAX_SIZE);
635 assert_eq!(parser.config().timeout_micros(), DEFAULT_TIMEOUT_MICROS);
636 }
637
638 #[test]
639 fn test_safe_parser_with_config() {
640 let config = SafeParserConfig::new().with_max_input_size(5 * 1024 * 1024);
641 let parser = SafeParser::new(config);
642
643 assert_eq!(parser.config().max_input_size(), 5 * 1024 * 1024);
644 }
645
646 #[test]
647 fn test_safe_parser_with_cancellation() {
648 let flag = CancellationFlag::new();
649 let parser = SafeParser::with_defaults().with_cancellation_flag(flag.clone());
650
651 // Parser should have the flag
652 assert!(parser.cancellation_flag.is_some());
653 }
654
655 #[test]
656 fn test_input_too_large_error() {
657 // Create parser with tiny limit for testing
658 let config = SafeParserConfig::new().with_max_input_size(MIN_MAX_SIZE);
659 let parser = SafeParser::new(config);
660
661 // Content larger than 1 MiB
662 let large_content = vec![b'x'; MIN_MAX_SIZE + 1];
663
664 // Use a dummy language (we'll hit size check before parsing)
665 let language = tree_sitter_rust::LANGUAGE.into();
666 let result = parser.parse(&language, &large_content, None);
667
668 match result {
669 Err(ParseError::InputTooLarge { size, max, file }) => {
670 assert_eq!(size, MIN_MAX_SIZE + 1);
671 assert_eq!(max, MIN_MAX_SIZE);
672 assert!(file.is_none());
673 }
674 _ => panic!("Expected InputTooLarge error"),
675 }
676 }
677
678 #[test]
679 fn test_input_too_large_with_file() {
680 let config = SafeParserConfig::new().with_max_input_size(MIN_MAX_SIZE);
681 let parser = SafeParser::new(config);
682
683 let large_content = vec![b'x'; MIN_MAX_SIZE + 1];
684 let file_path = PathBuf::from("/path/to/large.rs");
685 let language = tree_sitter_rust::LANGUAGE.into();
686
687 let result = parser.parse_file(&language, &large_content, &file_path);
688
689 match result {
690 Err(ParseError::InputTooLarge { file, .. }) => {
691 assert_eq!(file, Some(file_path));
692 }
693 _ => panic!("Expected InputTooLarge error with file path"),
694 }
695 }
696
697 #[test]
698 fn test_cancelled_before_parse() {
699 let flag = CancellationFlag::new();
700 flag.cancel(); // Cancel before parsing
701
702 let parser = SafeParser::with_defaults().with_cancellation_flag(flag);
703
704 let content = b"fn main() {}";
705 let language = tree_sitter_rust::LANGUAGE.into();
706 let result = parser.parse(&language, content, None);
707
708 match result {
709 Err(ParseError::ParseCancelled { reason, .. }) => {
710 assert!(reason.contains("before parse started"));
711 }
712 _ => panic!("Expected ParseCancelled error"),
713 }
714 }
715
716 #[test]
717 fn test_successful_parse() {
718 let parser = SafeParser::with_defaults();
719 let content = b"fn main() {}";
720 let language = tree_sitter_rust::LANGUAGE.into();
721
722 let result = parser.parse(&language, content, None);
723 assert!(result.is_ok());
724
725 let tree = result.unwrap();
726 // Verify we got a valid tree by checking root node kind
727 assert_eq!(tree.root_node().kind(), "source_file");
728 }
729
730 #[test]
731 fn test_successful_parse_with_file() {
732 let parser = SafeParser::with_defaults();
733 let content = b"fn main() { let x = 42; }";
734 let file_path = PathBuf::from("test.rs");
735 let language = tree_sitter_rust::LANGUAGE.into();
736
737 let result = parser.parse_file(&language, content, &file_path);
738 assert!(result.is_ok());
739 }
740
741 #[test]
742 fn test_parse_safe_convenience() {
743 let content = b"fn foo() {}";
744 let language = tree_sitter_rust::LANGUAGE.into();
745
746 let result = parse_safe(&language, content, None);
747 assert!(result.is_ok());
748 }
749
750 #[test]
751 #[allow(clippy::assertions_on_constants)] // These assertions serve as documentation
752 fn test_constants_sanity() {
753 // Verify constant relationships
754 assert!(MIN_MAX_SIZE < DEFAULT_MAX_SIZE);
755 assert!(DEFAULT_MAX_SIZE < MAX_MAX_SIZE);
756 assert!(MIN_TIMEOUT_MICROS < DEFAULT_TIMEOUT_MICROS);
757 assert!(DEFAULT_TIMEOUT_MICROS < MAX_TIMEOUT_MICROS);
758
759 // Verify human-friendly values
760 assert_eq!(MIN_MAX_SIZE, 1024 * 1024); // 1 MiB
761 assert_eq!(DEFAULT_MAX_SIZE, 10 * 1024 * 1024); // 10 MiB
762 assert_eq!(MAX_MAX_SIZE, 32 * 1024 * 1024); // 32 MiB
763 assert_eq!(MIN_TIMEOUT_MICROS, 100_000); // 100ms
764 assert_eq!(DEFAULT_TIMEOUT_MICROS, 2_000_000); // 2s
765 assert_eq!(MAX_TIMEOUT_MICROS, 5_000_000); // 5s
766 }
767
768 #[test]
769 fn test_termination_reason_enum() {
770 // Test that enum variants are distinct
771 assert_ne!(TerminationReason::None, TerminationReason::Cancelled);
772 assert_ne!(TerminationReason::None, TerminationReason::TimedOut);
773 assert_ne!(TerminationReason::Cancelled, TerminationReason::TimedOut);
774 }
775
776 /// Test fail-closed behavior: timeout must return error even if tree was partially produced.
777 ///
778 /// This test verifies the security-critical fail-closed behavior:
779 /// When a timeout is triggered during parsing, we MUST return `ParseTimedOut` error
780 /// regardless of whether tree-sitter produced a partial tree.
781 ///
782 /// The fix for this was: check `termination_reason` BEFORE checking if a tree exists,
783 /// rather than only checking `termination_reason` when `tree` is `None`.
784 #[test]
785 fn test_timeout_returns_error_fail_closed() {
786 // Use minimum timeout (100ms) with code that might trigger timeout
787 let config = SafeParserConfig::new().with_timeout_micros(MIN_TIMEOUT_MICROS);
788 let parser = SafeParser::new(config);
789
790 // Complex-ish code that might take some time to parse
791 // Even if it parses faster than 100ms, this test still validates the happy path.
792 // The key security guarantee is that IF the timeout triggers, we fail.
793 let content = br#"
794 fn complex_function() {
795 let x = vec![1, 2, 3, 4, 5];
796 for i in x.iter() {
797 if *i > 3 {
798 println!("{}", i);
799 }
800 }
801 }
802 "#;
803
804 let language = tree_sitter_rust::LANGUAGE.into();
805 let result = parser.parse(&language, content, None);
806
807 // Result should be either:
808 // - Ok(tree) if parsing completed within timeout
809 // - Err(ParseTimedOut) if timeout was triggered
810 // The key is: it must NEVER return Ok(partial_tree) after timeout
811 match result {
812 Ok(_tree) => {
813 // Parsing completed within timeout - that's fine
814 // This test primarily documents the fail-closed requirement
815 }
816 Err(ParseError::ParseTimedOut { timeout_micros, .. }) => {
817 // Timeout triggered - verify we got the error, not a partial tree
818 assert_eq!(timeout_micros, MIN_TIMEOUT_MICROS);
819 }
820 Err(ParseError::TreeSitterFailed) => {
821 // Callback compatibility issue - acceptable
822 }
823 Err(e) => {
824 panic!("Unexpected error type: {e:?}");
825 }
826 }
827 }
828
829 /// Test fail-closed behavior with cancellation.
830 ///
831 /// This test verifies that when cancellation is triggered DURING parsing,
832 /// we return `ParseCancelled` even if a partial tree was produced.
833 #[test]
834 fn test_cancellation_during_parse_fail_closed() {
835 use std::thread;
836 use std::time::Duration;
837
838 let flag = CancellationFlag::new();
839 let flag_clone = flag.clone();
840
841 // Use short timeout to give cancellation time to trigger
842 let config = SafeParserConfig::new().with_timeout_micros(MIN_TIMEOUT_MICROS);
843 let parser = SafeParser::new(config).with_cancellation_flag(flag);
844
845 // Spawn thread that cancels after a tiny delay
846 let handle = thread::spawn(move || {
847 thread::sleep(Duration::from_micros(10));
848 flag_clone.cancel();
849 });
850
851 // Moderately complex code
852 let content = br"
853 fn foo() { let x = 1; }
854 fn bar() { let y = 2; }
855 fn baz() { let z = 3; }
856 ";
857
858 let language = tree_sitter_rust::LANGUAGE.into();
859 let result = parser.parse(&language, content, None);
860
861 handle.join().unwrap();
862
863 // Result can be:
864 // - Ok: parsed before cancel took effect
865 // - Err(ParseCancelled): cancel triggered during parse
866 // - Err(ParseTimedOut): timeout triggered
867 // - Err(TreeSitterFailed): callback compatibility
868 // Key: NEVER Ok(partial_tree) after cancellation was triggered
869 match result {
870 Ok(_)
871 | Err(
872 ParseError::ParseCancelled { .. }
873 | ParseError::ParseTimedOut { .. }
874 | ParseError::TreeSitterFailed,
875 ) => {
876 // All acceptable outcomes - the key is fail-closed behavior
877 }
878 Err(e) => {
879 panic!("Unexpected error type: {e:?}");
880 }
881 }
882 }
883
884 // ========================================================================
885 // DETERMINISTIC FAIL-CLOSED TESTS
886 // These tests call finalize_parse_result directly with controlled inputs
887 // to verify fail-closed behavior without depending on timing or parsing.
888 // ========================================================================
889
890 /// Helper to create a valid tree for testing `finalize_parse_result`.
891 fn create_test_tree() -> Tree {
892 let mut parser = tree_sitter::Parser::new();
893 parser
894 .set_language(&tree_sitter_rust::LANGUAGE.into())
895 .unwrap();
896 parser.parse(b"fn main() {}", None).unwrap()
897 }
898
899 /// DETERMINISTIC: Timeout + Some(tree) must return `ParseTimedOut` error.
900 ///
901 /// This is the critical fail-closed test. Even if tree-sitter produces
902 /// a partial tree after timeout, we MUST return an error.
903 #[test]
904 fn test_finalize_timeout_with_tree_returns_error() {
905 let tree = create_test_tree();
906 let result =
907 finalize_parse_result(TerminationReason::TimedOut, Some(tree), None, 2_000_000);
908
909 match result {
910 Err(ParseError::ParseTimedOut {
911 timeout_micros,
912 file,
913 }) => {
914 assert_eq!(timeout_micros, 2_000_000);
915 assert!(file.is_none());
916 }
917 _ => panic!("Expected ParseTimedOut, got {result:?}"),
918 }
919 }
920
921 /// DETERMINISTIC: Cancellation + Some(tree) must return `ParseCancelled` error.
922 ///
923 /// Same as timeout case: even with a tree, cancellation returns error.
924 #[test]
925 fn test_finalize_cancelled_with_tree_returns_error() {
926 let tree = create_test_tree();
927 let result =
928 finalize_parse_result(TerminationReason::Cancelled, Some(tree), None, 2_000_000);
929
930 match result {
931 Err(ParseError::ParseCancelled { reason, file }) => {
932 assert!(reason.contains("cancelled"));
933 assert!(file.is_none());
934 }
935 _ => panic!("Expected ParseCancelled, got {result:?}"),
936 }
937 }
938
939 /// DETERMINISTIC: Timeout + None must return `ParseTimedOut` error.
940 #[test]
941 fn test_finalize_timeout_without_tree_returns_error() {
942 let result = finalize_parse_result(TerminationReason::TimedOut, None, None, 2_000_000);
943
944 match result {
945 Err(ParseError::ParseTimedOut { .. }) => {}
946 _ => panic!("Expected ParseTimedOut, got {result:?}"),
947 }
948 }
949
950 /// DETERMINISTIC: Cancellation + None must return `ParseCancelled` error.
951 #[test]
952 fn test_finalize_cancelled_without_tree_returns_error() {
953 let result = finalize_parse_result(TerminationReason::Cancelled, None, None, 2_000_000);
954
955 match result {
956 Err(ParseError::ParseCancelled { .. }) => {}
957 _ => panic!("Expected ParseCancelled, got {result:?}"),
958 }
959 }
960
961 /// DETERMINISTIC: No termination + Some(tree) returns Ok(tree).
962 #[test]
963 fn test_finalize_success_with_tree() {
964 let tree = create_test_tree();
965 let result = finalize_parse_result(TerminationReason::None, Some(tree), None, 2_000_000);
966
967 assert!(result.is_ok());
968 assert_eq!(result.unwrap().root_node().kind(), "source_file");
969 }
970
971 /// DETERMINISTIC: No termination + None returns `TreeSitterFailed`.
972 #[test]
973 fn test_finalize_failure_without_tree() {
974 let result = finalize_parse_result(TerminationReason::None, None, None, 2_000_000);
975
976 match result {
977 Err(ParseError::TreeSitterFailed) => {}
978 _ => panic!("Expected TreeSitterFailed, got {result:?}"),
979 }
980 }
981
982 /// DETERMINISTIC: Verify file path is included in timeout error.
983 #[test]
984 fn test_finalize_timeout_includes_file_path() {
985 let tree = create_test_tree();
986 let file_path = Path::new("/path/to/test.rs");
987 let result = finalize_parse_result(
988 TerminationReason::TimedOut,
989 Some(tree),
990 Some(file_path),
991 1_500_000,
992 );
993
994 match result {
995 Err(ParseError::ParseTimedOut {
996 timeout_micros,
997 file,
998 }) => {
999 assert_eq!(timeout_micros, 1_500_000);
1000 assert_eq!(file, Some(PathBuf::from("/path/to/test.rs")));
1001 }
1002 _ => panic!("Expected ParseTimedOut with file path, got {result:?}"),
1003 }
1004 }
1005
1006 /// DETERMINISTIC: Verify file path is included in cancellation error.
1007 #[test]
1008 fn test_finalize_cancelled_includes_file_path() {
1009 let tree = create_test_tree();
1010 let file_path = Path::new("/some/code.rs");
1011 let result = finalize_parse_result(
1012 TerminationReason::Cancelled,
1013 Some(tree),
1014 Some(file_path),
1015 2_000_000,
1016 );
1017
1018 match result {
1019 Err(ParseError::ParseCancelled { file, .. }) => {
1020 assert_eq!(file, Some(PathBuf::from("/some/code.rs")));
1021 }
1022 _ => panic!("Expected ParseCancelled with file path, got {result:?}"),
1023 }
1024 }
1025}