ries_rs/
search.rs

1//! Search and matching algorithms
2//!
3//! Finds equations by matching LHS and RHS expressions.
4
5use crate::eval::EvalContext;
6use crate::expr::EvaluatedExpr;
7use crate::pool::{RankingMode, TopKPool};
8use crate::profile::UserConstant;
9use crate::thresholds::{
10    DEGENERATE_DERIVATIVE, DEGENERATE_RANGE_TOLERANCE, DEGENERATE_TEST_THRESHOLD,
11    EXACT_MATCH_TOLERANCE, NEWTON_FINAL_TOLERANCE,
12};
13use std::collections::HashSet;
14use std::time::Duration;
15
16mod db;
17mod newton;
18#[cfg(test)]
19mod tests;
20
21use db::calculate_adaptive_search_radius;
22pub use db::{ComplexityTier, ExprDatabase, TieredExprDatabase};
23#[cfg(test)]
24use newton::newton_raphson;
25use newton::newton_raphson_with_constants;
26
27#[derive(Clone, Copy, Debug)]
28struct SearchTimer {
29    #[cfg(all(target_arch = "wasm32", feature = "wasm"))]
30    start_ms: f64,
31    #[cfg(not(all(target_arch = "wasm32", feature = "wasm")))]
32    start: std::time::Instant,
33}
34
35impl SearchTimer {
36    #[inline]
37    fn start() -> Self {
38        #[cfg(all(target_arch = "wasm32", feature = "wasm"))]
39        {
40            Self {
41                start_ms: js_sys::Date::now(),
42            }
43        }
44
45        #[cfg(not(all(target_arch = "wasm32", feature = "wasm")))]
46        {
47            Self {
48                start: std::time::Instant::now(),
49            }
50        }
51    }
52
53    #[inline]
54    fn elapsed(self) -> Duration {
55        #[cfg(all(target_arch = "wasm32", feature = "wasm"))]
56        {
57            let elapsed_ms = (js_sys::Date::now() - self.start_ms).max(0.0);
58            Duration::from_secs_f64(elapsed_ms / 1000.0)
59        }
60
61        #[cfg(not(all(target_arch = "wasm32", feature = "wasm")))]
62        {
63            self.start.elapsed()
64        }
65    }
66}
67
68/// Statistics collected during search
69#[derive(Clone, Debug, Default)]
70pub struct SearchStats {
71    /// Time spent generating expressions
72    pub gen_time: Duration,
73    /// Time spent searching/matching
74    pub search_time: Duration,
75    /// Number of LHS expressions generated
76    pub lhs_count: usize,
77    /// Number of RHS expressions generated
78    pub rhs_count: usize,
79    /// Number of LHS expressions tested (after filtering)
80    pub lhs_tested: usize,
81    /// Number of candidate pairs tested (coarse filter)
82    pub candidates_tested: usize,
83    /// Number of Newton-Raphson calls
84    pub newton_calls: usize,
85    /// Number of successful Newton convergences
86    pub newton_success: usize,
87    /// Number of matches inserted into pool
88    pub pool_insertions: usize,
89    /// Number of matches rejected by pool (error threshold)
90    pub pool_rejections_error: usize,
91    /// Number of matches rejected by pool (dedupe)
92    pub pool_rejections_dedupe: usize,
93    /// Number of matches evicted from pool
94    pub pool_evictions: usize,
95    /// Final pool size
96    pub pool_final_size: usize,
97    /// Final best error in pool
98    pub pool_best_error: f64,
99    /// Whether search exited early
100    pub early_exit: bool,
101}
102
103impl SearchStats {
104    pub fn new() -> Self {
105        Self::default()
106    }
107
108    /// Print stats to stdout
109    pub fn print(&self) {
110        println!();
111        println!("  === Search Statistics ===");
112        println!();
113        println!("  Generation:");
114        println!(
115            "    Time:            {:>10.3}ms",
116            self.gen_time.as_secs_f64() * 1000.0
117        );
118        println!("    LHS expressions: {:>10}", self.lhs_count);
119        println!("    RHS expressions: {:>10}", self.rhs_count);
120        println!();
121        println!("  Search:");
122        println!(
123            "    Time:            {:>10.3}ms",
124            self.search_time.as_secs_f64() * 1000.0
125        );
126        println!("    LHS tested:      {:>10}", self.lhs_tested);
127        println!("    Candidates:      {:>10}", self.candidates_tested);
128        println!("    Newton calls:    {:>10}", self.newton_calls);
129        println!(
130            "    Newton success:  {:>10} ({:.1}%)",
131            self.newton_success,
132            if self.newton_calls > 0 {
133                100.0 * self.newton_success as f64 / self.newton_calls as f64
134            } else {
135                0.0
136            }
137        );
138        if self.early_exit {
139            println!("    Early exit:      yes");
140        }
141        println!();
142        println!("  Pool:");
143        println!("    Insertions:      {:>10}", self.pool_insertions);
144        println!("    Rejected (err):  {:>10}", self.pool_rejections_error);
145        println!("    Rejected (dup):  {:>10}", self.pool_rejections_dedupe);
146        println!("    Evictions:       {:>10}", self.pool_evictions);
147        println!("    Final size:      {:>10}", self.pool_final_size);
148        println!("    Best error:      {:>14.2e}", self.pool_best_error);
149    }
150}
151
152/// Compute complexity limits from a search level.
153///
154/// This function provides a consistent mapping from integer search levels
155/// to complexity limits used by the library API (Python, WASM, and adaptive mode).
156///
157/// # Formula
158///
159/// - Base LHS complexity: 10
160/// - Base RHS complexity: 12
161/// - Level multiplier: 4
162/// - `max_lhs = 10 + 4 * level`
163/// - `max_rhs = 12 + 4 * level`
164///
165/// # Level Guidelines
166///
167/// | Level | LHS Max | RHS Max | Expression Count (approx) |
168/// |-------|---------|---------|---------------------------|
169/// | 0     | 10      | 12      | ~35K                      |
170/// | 1     | 14      | 16      | ~130K                     |
171/// | 2     | 18      | 20      | ~500K                     |
172/// | 3     | 22      | 24      | ~2M                       |
173/// | 5     | 30      | 32      | ~15M                      |
174///
175/// # Note
176///
177/// The CLI uses a different formula with higher bases (35/35) and multiplier (10)
178/// to match the original RIES command-line behavior. This function is for
179/// programmatic API consumers.
180#[inline]
181pub fn level_to_complexity(level: u32) -> (u32, u32) {
182    const BASE_LHS: u32 = 10;
183    const BASE_RHS: u32 = 12;
184    const LEVEL_MULTIPLIER: u32 = 4;
185
186    // Saturating arithmetic avoids panics/wraparound if an API caller passes
187    // an out-of-range level without validation.
188    let level_factor = LEVEL_MULTIPLIER.saturating_mul(level);
189    (
190        BASE_LHS.saturating_add(level_factor),
191        BASE_RHS.saturating_add(level_factor),
192    )
193}
194
195/// A matched equation
196#[derive(Clone, Debug)]
197pub struct Match {
198    /// Left-hand side expression (contains x)
199    pub lhs: EvaluatedExpr,
200    /// Right-hand side expression (constant)
201    pub rhs: EvaluatedExpr,
202    /// Solved value of x
203    pub x_value: f64,
204    /// Difference from target: x_value - target
205    pub error: f64,
206    /// Total complexity (LHS + RHS)
207    pub complexity: u32,
208}
209
210impl Match {
211    /// Format the match for display (used in tests)
212    #[cfg(test)]
213    pub fn display(&self, _target: f64) -> String {
214        let lhs_str = self.lhs.expr.to_infix();
215        let rhs_str = self.rhs.expr.to_infix();
216
217        let error_str = if self.error.abs() < EXACT_MATCH_TOLERANCE {
218            "('exact' match)".to_string()
219        } else {
220            let sign = if self.error >= 0.0 { "+" } else { "-" };
221            format!("for x = T {} {:.6e}", sign, self.error.abs())
222        };
223
224        format!(
225            "{:>24} = {:<24} {} {{{}}}",
226            lhs_str, rhs_str, error_str, self.complexity
227        )
228    }
229}
230
231/// Configuration for the search process.
232///
233/// Controls matching thresholds, result limits, symbol filtering, and search behavior.
234/// This struct is the main entry point for customizing how RIES searches for equations.
235///
236/// # Example
237///
238/// ```rust
239/// use ries_rs::search::SearchConfig;
240/// use ries_rs::pool::RankingMode;
241///
242/// let config = SearchConfig {
243///     target: std::f64::consts::PI,
244///     max_matches: 50,
245///     max_error: 1e-10,
246///     stop_at_exact: true,
247///     ranking_mode: RankingMode::Complexity,
248///     ..SearchConfig::default()
249/// };
250/// ```
251///
252/// # Fields Overview
253///
254/// - **Target**: `target` - the value to search for equations matching it
255/// - **Limits**: `max_matches`, `max_error` - control result quantity and quality
256/// - **Stopping**: `stop_at_exact`, `stop_below` - early termination conditions
257/// - **Refinement**: `newton_iterations`, `refine_with_newton`, `derivative_margin` - Newton-Raphson settings
258/// - **Filtering**: `zero_value_threshold`, `rhs_allowed_symbols`, `rhs_excluded_symbols` - prune unwanted results
259/// - **Extensions**: `user_constants`, `user_functions` - custom symbols
260/// - **Diagnostics**: `show_newton`, `show_match_checks`, etc. - debug output
261/// - **Ranking**: `ranking_mode` - how to order results
262#[derive(Clone, Debug)]
263pub struct SearchConfig {
264    /// Target value to find equations for.
265    ///
266    /// The search will find equations where LHS ≈ RHS ≈ target.
267    /// This is the number you're trying to "solve" or represent symbolically.
268    ///
269    /// Default: 0.0
270    pub target: f64,
271
272    /// Maximum number of matches to return in results.
273    ///
274    /// Once this many matches are found and confirmed, the pool will start
275    /// evicting lower-quality matches to make room for better ones.
276    ///
277    /// Default: 100
278    pub max_matches: usize,
279
280    /// Maximum acceptable error for a match to be included.
281    ///
282    /// Only expressions within this absolute error from the target are considered matches.
283    /// Smaller values give more precise but fewer results.
284    ///
285    /// Default: 1.0
286    pub max_error: f64,
287
288    /// Stop search when an exact match is found.
289    ///
290    /// When true and an expression matches within the exact match tolerance,
291    /// the search terminates early. Useful when you only need one good solution.
292    ///
293    /// Default: false
294    pub stop_at_exact: bool,
295
296    /// Stop search when error goes below this threshold.
297    ///
298    /// If set, the search will terminate once a match is found with error
299    /// below this value. Set to `Some(1e-12)` for high-precision early stopping.
300    ///
301    /// Default: None
302    pub stop_below: Option<f64>,
303
304    /// Threshold for pruning LHS expressions with near-zero values.
305    ///
306    /// LHS expressions with absolute values below this threshold are pruned
307    /// to prevent flooding results with trivial matches like `cospi(2.5) = 0`.
308    /// Set to 0.0 to disable this filtering.
309    ///
310    /// Default: 1e-4
311    pub zero_value_threshold: f64,
312
313    /// Maximum Newton-Raphson iterations for root refinement.
314    ///
315    /// Controls how many iterations are used to refine candidate solutions.
316    /// Higher values may find more precise roots but take longer.
317    ///
318    /// Default: 15
319    pub newton_iterations: usize,
320
321    /// User-defined constants for evaluation.
322    ///
323    /// Custom constants that can be used in expressions, defined via `-N\'name=value\'`.
324    /// Each constant has a name, value, and optional description.
325    ///
326    /// Default: empty
327    pub user_constants: Vec<UserConstant>,
328
329    /// User-defined functions for evaluation.
330    ///
331    /// Custom functions that can be used in expressions, defined via `-F\'name:formula\'`.
332    /// These extend the built-in functions (sin, cos, etc.).
333    ///
334    /// Default: empty
335    pub user_functions: Vec<crate::udf::UserFunction>,
336
337    /// Argument scale for `sinpi/cospi/tanpi` evaluation.
338    ///
339    /// The default is π, matching original `sinpi(x) = sin(πx)` semantics.
340    /// Override this for alternate trig conventions without relying on global state.
341    pub trig_argument_scale: f64,
342
343    /// Whether to refine candidate roots with Newton-Raphson iteration.
344    ///
345    /// When true, initial coarse matches are refined using Newton-Raphson
346    /// to find more precise solutions. Disable for faster but less precise search.
347    ///
348    /// Default: true
349    pub refine_with_newton: bool,
350
351    /// Optional RHS-only allowed symbol set.
352    ///
353    /// If set, all symbols used on the RHS must be in this set (specified as ASCII bytes).
354    /// This allows restricting RHS expressions to a subset of available symbols.
355    /// Combined with `rhs_excluded_symbols`, both filters apply.
356    ///
357    /// Default: None (all symbols allowed)
358    pub rhs_allowed_symbols: Option<HashSet<u8>>,
359
360    /// Optional RHS-only excluded symbol set.
361    ///
362    /// If set, RHS expressions using any of these symbols are rejected.
363    /// This allows excluding specific symbols from RHS expressions.
364    /// Combined with `rhs_allowed_symbols`, both filters apply.
365    ///
366    /// Default: None (no symbols excluded)
367    pub rhs_excluded_symbols: Option<HashSet<u8>>,
368
369    /// Show Newton-Raphson iteration diagnostic output.
370    ///
371    /// When true, prints detailed information about each Newton-Raphson iteration.
372    /// Enabled by `-Dn` command-line flag. Useful for debugging convergence issues.
373    ///
374    /// Default: false
375    pub show_newton: bool,
376
377    /// Show match check diagnostic output.
378    ///
379    /// When true, prints information about each candidate match evaluation.
380    /// Enabled by `-Do` command-line flag.
381    ///
382    /// Default: false
383    pub show_match_checks: bool,
384
385    /// Show pruned arithmetic diagnostic output.
386    ///
387    /// When true, prints information about arithmetic expressions that were pruned.
388    /// Enabled by `-DA` command-line flag.
389    ///
390    /// Default: false
391    #[allow(dead_code)]
392    pub show_pruned_arith: bool,
393
394    /// Show pruned range/zero diagnostic output.
395    ///
396    /// When true, prints information about expressions pruned due to range
397    /// constraints or near-zero values. Enabled by `-DB` command-line flag.
398    ///
399    /// Default: false
400    pub show_pruned_range: bool,
401
402    /// Show database adds diagnostic output.
403    ///
404    /// When true, prints information about expressions added to the database.
405    /// Enabled by `-DG` command-line flag.
406    ///
407    /// Default: false
408    pub show_db_adds: bool,
409
410    /// When true, matches must match all significant digits of the target.
411    ///
412    /// When enabled, the tolerance is computed based on the magnitude of the
413    /// target value to require full precision matching. The actual tolerance
414    /// is computed in `main.rs` and passed as `max_error`.
415    ///
416    /// Default: false
417    #[allow(dead_code)]
418    pub match_all_digits: bool,
419
420    /// Threshold below which derivative is considered degenerate in Newton-Raphson.
421    ///
422    /// If `|derivative| < derivative_margin` during Newton-Raphson iteration,
423    /// the refinement is skipped to avoid numerical instability and division
424    /// by near-zero values.
425    ///
426    /// Default: 1e-12 (from `DEGENERATE_DERIVATIVE` constant)
427    pub derivative_margin: f64,
428
429    /// Ranking mode for pool ordering and eviction.
430    ///
431    /// Determines how matches are ranked in the result pool:
432    /// - `Complexity`: Sort by expression complexity (simpler is better)
433    /// - `Error`: Sort by match error (more precise is better)
434    ///
435    /// Default: `RankingMode::Complexity`
436    pub ranking_mode: RankingMode,
437}
438
439impl Default for SearchConfig {
440    fn default() -> Self {
441        Self {
442            target: 0.0,
443            max_matches: 100,
444            max_error: 1.0,
445            stop_at_exact: false,
446            stop_below: None,
447            zero_value_threshold: 1e-4,
448            newton_iterations: 15,
449            user_constants: Vec::new(),
450            user_functions: Vec::new(),
451            trig_argument_scale: crate::eval::DEFAULT_TRIG_ARGUMENT_SCALE,
452            refine_with_newton: true,
453            rhs_allowed_symbols: None,
454            rhs_excluded_symbols: None,
455            show_newton: false,
456            show_match_checks: false,
457            show_pruned_arith: false,
458            show_pruned_range: false,
459            show_db_adds: false,
460            match_all_digits: false,
461            derivative_margin: DEGENERATE_DERIVATIVE,
462            ranking_mode: RankingMode::Complexity,
463        }
464    }
465}
466
467impl SearchConfig {
468    /// Build an explicit search context for this configuration.
469    pub fn context(&self) -> SearchContext<'_> {
470        SearchContext::new(self)
471    }
472
473    #[inline]
474    fn rhs_symbol_allowed(&self, rhs: &crate::expr::Expression) -> bool {
475        let symbols = rhs.symbols();
476
477        if let Some(allowed) = &self.rhs_allowed_symbols {
478            if symbols.iter().any(|s| !allowed.contains(&(*s as u8))) {
479                return false;
480            }
481        }
482
483        if let Some(excluded) = &self.rhs_excluded_symbols {
484            if symbols.iter().any(|s| excluded.contains(&(*s as u8))) {
485                return false;
486            }
487        }
488
489        true
490    }
491}
492
493/// Explicit per-run search context.
494#[derive(Clone, Copy, Debug)]
495pub struct SearchContext<'a> {
496    /// Immutable search configuration for this run.
497    pub config: &'a SearchConfig,
498    /// Evaluation context derived from the search configuration.
499    pub eval: EvalContext<'a>,
500}
501
502impl<'a> SearchContext<'a> {
503    pub fn new(config: &'a SearchConfig) -> Self {
504        Self {
505            config,
506            eval: EvalContext::from_slices(&config.user_constants, &config.user_functions)
507                .with_trig_argument_scale(config.trig_argument_scale),
508        }
509    }
510}
511
512/// Perform a complete search
513///
514/// This function is part of the public API for library consumers who want a simple
515/// search interface without statistics collection.
516#[allow(dead_code)]
517pub fn search(target: f64, gen_config: &crate::gen::GenConfig, max_matches: usize) -> Vec<Match> {
518    let (matches, _stats) = search_with_stats(target, gen_config, max_matches);
519    matches
520}
521
522/// Perform a complete search with statistics collection
523///
524/// This function is part of the public API for library consumers who want
525/// detailed statistics about the search process.
526#[allow(dead_code)]
527pub fn search_with_stats(
528    target: f64,
529    gen_config: &crate::gen::GenConfig,
530    max_matches: usize,
531) -> (Vec<Match>, SearchStats) {
532    search_with_stats_and_options(target, gen_config, max_matches, false, None)
533}
534
535/// Perform a complete search with statistics collection and early exit options
536pub fn search_with_stats_and_options(
537    target: f64,
538    gen_config: &crate::gen::GenConfig,
539    max_matches: usize,
540    stop_at_exact: bool,
541    stop_below: Option<f64>,
542) -> (Vec<Match>, SearchStats) {
543    if !target.is_finite() {
544        return (Vec::new(), SearchStats::default());
545    }
546    let config = SearchConfig {
547        target,
548        max_matches,
549        stop_at_exact,
550        stop_below,
551        user_constants: gen_config.user_constants.clone(),
552        user_functions: gen_config.user_functions.clone(),
553        ..Default::default()
554    };
555
556    search_with_stats_and_config(gen_config, &config)
557}
558
559/// Perform a complete search with a fully specified search configuration.
560///
561/// This function includes a safety fallback: if expression generation would
562/// exceed ~2M expressions (which would risk OOM), it automatically switches
563/// to streaming mode to avoid memory exhaustion.
564pub fn search_with_stats_and_config(
565    gen_config: &crate::gen::GenConfig,
566    config: &SearchConfig,
567) -> (Vec<Match>, SearchStats) {
568    if !config.target.is_finite() {
569        return (Vec::new(), SearchStats::default());
570    }
571
572    use crate::gen::generate_all_with_limit_and_context;
573
574    const MAX_EXPRESSIONS_BEFORE_STREAMING: usize = 2_000_000;
575    let context = SearchContext::new(config);
576
577    let gen_start = SearchTimer::start();
578
579    // Try bounded generation first — if limit exceeded, fall back to streaming
580    if let Some(generated) = generate_all_with_limit_and_context(
581        gen_config,
582        config.target,
583        &context.eval,
584        MAX_EXPRESSIONS_BEFORE_STREAMING,
585    ) {
586        let gen_time = gen_start.elapsed();
587
588        // Build database
589        let mut db = ExprDatabase::new();
590        db.insert_rhs(generated.rhs);
591
592        // Find matches with stats
593        let (matches, mut stats) = db.find_matches_with_stats_and_context(&generated.lhs, &context);
594
595        // Add generation stats
596        stats.gen_time = gen_time;
597        stats.lhs_count = generated.lhs.len();
598        stats.rhs_count = db.rhs_count();
599
600        (matches, stats)
601    } else {
602        // Limit exceeded — fall back to streaming mode which avoids OOM
603        search_streaming_with_config(gen_config, config)
604    }
605}
606
607// =============================================================================
608// ADAPTIVE SEARCH - Iterative LHS/RHS expansion like original RIES
609// =============================================================================
610
611/// Perform an adaptive search that iteratively expands LHS/RHS complexity
612///
613/// This implements the original RIES algorithm where expressions are generated
614/// incrementally, expanding the side (LHS or RHS) that has fewer expressions.
615/// This ensures balanced growth and matches the original's expression counts.
616///
617/// # Algorithm
618///
619/// 1. Start with minimal complexity limits (LHS: 1, RHS: 1)
620/// 2. Generate expressions at current complexity
621/// 3. Track how many expressions were generated for each side
622/// 4. Expand the side with fewer expressions (increase complexity by 1)
623/// 5. Repeat until target expression count is reached
624/// 6. Then perform matching on all generated expressions
625///
626/// # Advantages
627///
628/// - Matches original RIES behavior exactly
629/// - Generates similar number of expressions as original
630/// - Better memory efficiency than generating all at once
631/// - Can leverage parallelization more effectively
632///
633/// # Expression Count Formula
634///
635/// Target expressions = 2000 × 4^(2 + level)
636/// - Level 0: ~32,000 expressions
637/// - Level 1: ~128,000 expressions
638/// - Level 2: ~512,000 expressions
639/// - Level 3: ~2,048,000 expressions
640pub fn search_adaptive(
641    base_config: &crate::gen::GenConfig,
642    search_config: &SearchConfig,
643    level: u32,
644) -> (Vec<Match>, SearchStats) {
645    use crate::expr::EvaluatedExpr;
646    use crate::gen::{quantize_value, LhsKey};
647    use std::collections::HashMap;
648
649    let gen_start = SearchTimer::start();
650    let context = SearchContext::new(search_config);
651    // Use HashMap so dedup keeps the simplest expression, not first-seen.
652    // With parallel generation the arrival order is non-deterministic, so
653    // first-seen could discard a simpler equivalent expression.
654    let mut lhs_map: HashMap<LhsKey, EvaluatedExpr> = HashMap::new();
655    let mut rhs_map: HashMap<i64, EvaluatedExpr> = HashMap::new();
656
657    // For adaptive mode, use the standard complexity formula
658    // See level_to_complexity() for the standard mapping
659    let (std_lhs, std_rhs) = level_to_complexity(level);
660
661    let mut config = base_config.clone();
662    config.max_lhs_complexity = std_lhs.max(base_config.max_lhs_complexity);
663    config.max_rhs_complexity = std_rhs.max(base_config.max_rhs_complexity);
664
665    // Generate all expressions at the calculated complexity
666    // Use parallel generation when available for significantly better performance
667    let generated = {
668        #[cfg(feature = "parallel")]
669        {
670            crate::gen::generate_all_parallel_with_context(
671                &config,
672                search_config.target,
673                &context.eval,
674            )
675        }
676        #[cfg(not(feature = "parallel"))]
677        {
678            crate::gen::generate_all_with_context(&config, search_config.target, &context.eval)
679        }
680    };
681
682    // Deduplicate LHS expressions, keeping the simplest equivalent
683    for expr in generated.lhs {
684        let key = (quantize_value(expr.value), quantize_value(expr.derivative));
685        lhs_map
686            .entry(key)
687            .and_modify(|existing| {
688                if expr.expr.complexity() < existing.expr.complexity() {
689                    *existing = expr.clone();
690                }
691            })
692            .or_insert(expr);
693    }
694
695    // Deduplicate RHS expressions, keeping the simplest equivalent
696    for expr in generated.rhs {
697        let key = quantize_value(expr.value);
698        rhs_map
699            .entry(key)
700            .and_modify(|existing| {
701                if expr.expr.complexity() < existing.expr.complexity() {
702                    *existing = expr.clone();
703                }
704            })
705            .or_insert(expr);
706    }
707
708    let all_lhs: Vec<EvaluatedExpr> = lhs_map.into_values().collect();
709    let all_rhs: Vec<EvaluatedExpr> = rhs_map.into_values().collect();
710
711    let gen_time = gen_start.elapsed();
712
713    // Now perform the actual matching
714    let mut db = ExprDatabase::new();
715    db.insert_rhs(all_rhs);
716
717    let search_start = SearchTimer::start();
718    let (matches, match_stats) = db.find_matches_with_stats_and_context(&all_lhs, &context);
719    let search_time = search_start.elapsed();
720
721    // Combine stats
722    let mut stats = SearchStats::new();
723    stats.gen_time = gen_time;
724    stats.search_time = search_time;
725    stats.lhs_count = all_lhs.len();
726    stats.rhs_count = db.rhs_count();
727    stats.lhs_tested = match_stats.lhs_tested;
728    stats.candidates_tested = match_stats.candidates_tested;
729    stats.newton_calls = match_stats.newton_calls;
730    stats.newton_success = match_stats.newton_success;
731    stats.pool_insertions = match_stats.pool_insertions;
732    stats.pool_rejections_error = match_stats.pool_rejections_error;
733    stats.pool_rejections_dedupe = match_stats.pool_rejections_dedupe;
734    stats.pool_evictions = match_stats.pool_evictions;
735    stats.pool_final_size = match_stats.pool_final_size;
736    stats.pool_best_error = match_stats.pool_best_error;
737    stats.early_exit = match_stats.early_exit;
738
739    (matches, stats)
740}
741
742// =============================================================================
743// STREAMING SEARCH - Memory-efficient search for high complexity levels
744// =============================================================================
745
746/// Perform a streaming search that processes expressions as they're generated
747///
748/// This is the memory-efficient version of search that builds the RHS database
749/// incrementally. Instead of generating ALL expressions into memory first,
750/// it processes RHS expressions immediately and matches LHS expressions as
751/// they arrive.
752///
753/// # Memory Efficiency
754///
755/// - Traditional: O(expressions) memory - all expressions stored
756/// - Streaming: O(database) memory - only RHS database stored, LHS processed on-the-fly
757///
758/// # When to Use
759///
760/// Use streaming search when:
761/// - Complexity levels are high (L4+, where expressions count > 10M)
762/// - Memory is limited
763/// - You want to see results progressively
764///
765/// # Example
766///
767/// ```no_run
768/// use ries_rs::gen::GenConfig;
769/// use ries_rs::search::search_streaming;
770/// let config = GenConfig::default();
771/// let (matches, stats) = search_streaming(2.5, &config, 100, false, None);
772/// ```
773#[allow(dead_code)]
774pub fn search_streaming(
775    target: f64,
776    gen_config: &crate::gen::GenConfig,
777    max_matches: usize,
778    stop_at_exact: bool,
779    stop_below: Option<f64>,
780) -> (Vec<Match>, SearchStats) {
781    let config = SearchConfig {
782        target,
783        max_matches,
784        stop_at_exact,
785        stop_below,
786        user_constants: gen_config.user_constants.clone(),
787        user_functions: gen_config.user_functions.clone(),
788        ..Default::default()
789    };
790
791    search_streaming_with_config(gen_config, &config)
792}
793
794/// Perform a streaming search with a fully specified search configuration.
795pub fn search_streaming_with_config(
796    gen_config: &crate::gen::GenConfig,
797    search_config: &SearchConfig,
798) -> (Vec<Match>, SearchStats) {
799    use crate::gen::{generate_streaming_with_context, StreamingCallbacks};
800    use std::collections::HashMap;
801
802    let gen_start = SearchTimer::start();
803    let mut stats = SearchStats::new();
804    let context = SearchContext::new(search_config);
805
806    // Respect configured max error (with a tiny floor for numerical stability)
807    let initial_max_error = search_config.max_error.max(1e-12);
808
809    // Create bounded pool with configured capacity
810    let mut pool = TopKPool::new_with_diagnostics(
811        search_config.max_matches,
812        initial_max_error,
813        search_config.show_db_adds,
814        search_config.ranking_mode,
815    );
816
817    // Build RHS database first using streaming
818    let mut rhs_db = TieredExprDatabase::new();
819    let mut rhs_map: HashMap<i64, crate::expr::EvaluatedExpr> = HashMap::new();
820
821    // Collect LHS expressions to process later
822    let mut lhs_exprs: Vec<crate::expr::EvaluatedExpr> = Vec::new();
823
824    // First pass: collect all RHS and LHS via streaming
825    {
826        let mut callbacks = StreamingCallbacks {
827            on_rhs: &mut |expr| {
828                // Deduplicate by value, keeping simplest.
829                // Use quantize_value (not inline * 1e8) to avoid i64 overflow for large values.
830                let key = crate::gen::quantize_value(expr.value);
831                rhs_map
832                    .entry(key)
833                    .and_modify(|existing| {
834                        if expr.expr.complexity() < existing.expr.complexity() {
835                            *existing = expr.clone();
836                        }
837                    })
838                    .or_insert_with(|| expr.clone());
839                true
840            },
841            on_lhs: &mut |expr| {
842                lhs_exprs.push(expr.clone());
843                true
844            },
845        };
846
847        // Generate with streaming callbacks
848        generate_streaming_with_context(
849            gen_config,
850            search_config.target,
851            &context.eval,
852            &mut callbacks,
853        );
854    }
855
856    // Build the tiered database from deduplicated RHS
857    for expr in rhs_map.into_values() {
858        rhs_db.insert(expr);
859    }
860    rhs_db.finalize();
861
862    stats.rhs_count = rhs_db.total_count();
863    stats.lhs_count = lhs_exprs.len();
864    stats.gen_time = gen_start.elapsed();
865
866    // Now match LHS expressions against the RHS database
867    let search_start = SearchTimer::start();
868
869    // Sort LHS by complexity so simpler expressions are processed first
870    lhs_exprs.sort_by_key(|e| e.expr.complexity());
871
872    // Early exit tracking
873    let mut early_exit = false;
874
875    'outer: for lhs in &lhs_exprs {
876        if early_exit {
877            break;
878        }
879
880        // Skip LHS with value too close to 0
881        if lhs.value.abs() < search_config.zero_value_threshold {
882            if search_config.show_pruned_range {
883                eprintln!(
884                    "  [pruned range] value={:.6e} reason=\"near-zero\" expr=\"{}\"",
885                    lhs.value,
886                    lhs.expr.to_infix()
887                );
888            }
889            continue;
890        }
891
892        // Skip degenerate expressions
893        if lhs.derivative.abs() < DEGENERATE_TEST_THRESHOLD {
894            let test_x = search_config.target + std::f64::consts::E;
895            // Use the full evaluator (including user_functions) so that UDF-containing
896            // expressions are not silently skipped due to evaluate_with_constants
897            // returning Err for user-function symbols.
898            if let Ok(test_result) =
899                crate::eval::evaluate_fast_with_context(&lhs.expr, test_x, &context.eval)
900            {
901                let value_unchanged =
902                    (test_result.value - lhs.value).abs() < DEGENERATE_TEST_THRESHOLD;
903                let deriv_still_zero = test_result.derivative.abs() < DEGENERATE_TEST_THRESHOLD;
904                if deriv_still_zero || value_unchanged {
905                    continue;
906                }
907            }
908
909            // Check for value match
910            stats.lhs_tested += 1;
911            for rhs in rhs_db.iter_tiers_in_range(
912                lhs.value - DEGENERATE_RANGE_TOLERANCE,
913                lhs.value + DEGENERATE_RANGE_TOLERANCE,
914            ) {
915                if !search_config.rhs_symbol_allowed(&rhs.expr) {
916                    continue;
917                }
918                stats.candidates_tested += 1;
919                if search_config.show_match_checks {
920                    eprintln!(
921                        "  [match] checking lhs={:.6} rhs={:.6}",
922                        lhs.value, rhs.value
923                    );
924                }
925                let val_diff = (lhs.value - rhs.value).abs();
926                if val_diff < DEGENERATE_RANGE_TOLERANCE && pool.would_accept(0.0, true) {
927                    let m = Match {
928                        lhs: lhs.clone(),
929                        rhs: rhs.clone(),
930                        x_value: search_config.target,
931                        error: 0.0,
932                        complexity: lhs.expr.complexity() + rhs.expr.complexity(),
933                    };
934                    pool.try_insert(m);
935                }
936            }
937            continue;
938        }
939
940        stats.lhs_tested += 1;
941
942        // Use adaptive search radius
943        let search_radius = calculate_adaptive_search_radius(
944            lhs.derivative,
945            lhs.expr.complexity(),
946            pool.len(),
947            search_config.max_matches,
948            pool.best_error,
949        );
950        let low = lhs.value - search_radius;
951        let high = lhs.value + search_radius;
952
953        // Search using tiered iterator (lower tiers first)
954        for rhs in rhs_db.iter_tiers_in_range(low, high) {
955            if !search_config.rhs_symbol_allowed(&rhs.expr) {
956                continue;
957            }
958            stats.candidates_tested += 1;
959            if search_config.show_match_checks {
960                eprintln!(
961                    "  [match] checking lhs={:.6} rhs={:.6}",
962                    lhs.value, rhs.value
963                );
964            }
965
966            // Compute initial error estimate (coarse filter)
967            let val_diff = lhs.value - rhs.value;
968            let x_delta = -val_diff / lhs.derivative;
969            let coarse_error = x_delta.abs();
970
971            // Skip if coarse estimate won't pass threshold
972            let is_potentially_exact = coarse_error < NEWTON_FINAL_TOLERANCE;
973            if !pool.would_accept_strict(coarse_error, is_potentially_exact) {
974                continue;
975            }
976
977            if !search_config.refine_with_newton {
978                let refined_x = search_config.target + x_delta;
979                let refined_error = x_delta;
980                let is_exact = refined_error.abs() < EXACT_MATCH_TOLERANCE;
981
982                if pool.would_accept(refined_error.abs(), is_exact) {
983                    let m = Match {
984                        lhs: lhs.clone(),
985                        rhs: rhs.clone(),
986                        x_value: refined_x,
987                        error: refined_error,
988                        complexity: lhs.expr.complexity() + rhs.expr.complexity(),
989                    };
990
991                    pool.try_insert(m);
992
993                    if search_config.stop_at_exact && is_exact {
994                        early_exit = true;
995                        break 'outer;
996                    }
997                    if let Some(threshold) = search_config.stop_below {
998                        if refined_error.abs() < threshold {
999                            early_exit = true;
1000                            break 'outer;
1001                        }
1002                    }
1003                }
1004                continue;
1005            }
1006
1007            // Refine with Newton-Raphson
1008            stats.newton_calls += 1;
1009            if let Some(refined_x) = newton_raphson_with_constants(
1010                &lhs.expr,
1011                rhs.value,
1012                search_config.target,
1013                search_config.newton_iterations,
1014                &context.eval,
1015                search_config.show_newton,
1016                search_config.derivative_margin,
1017            ) {
1018                stats.newton_success += 1;
1019                let refined_error = refined_x - search_config.target;
1020                let is_exact = refined_error.abs() < EXACT_MATCH_TOLERANCE;
1021
1022                // Check if this is acceptable
1023                if pool.would_accept(refined_error.abs(), is_exact) {
1024                    let m = Match {
1025                        lhs: lhs.clone(),
1026                        rhs: rhs.clone(),
1027                        x_value: refined_x,
1028                        error: refined_error,
1029                        complexity: lhs.expr.complexity() + rhs.expr.complexity(),
1030                    };
1031
1032                    // Insert into pool
1033                    pool.try_insert(m);
1034
1035                    // Check early exit conditions
1036                    if search_config.stop_at_exact && is_exact {
1037                        early_exit = true;
1038                        break 'outer;
1039                    }
1040                    if let Some(threshold) = search_config.stop_below {
1041                        if refined_error.abs() < threshold {
1042                            early_exit = true;
1043                            break 'outer;
1044                        }
1045                    }
1046                }
1047            }
1048        }
1049    }
1050
1051    // Collect pool stats
1052    stats.pool_insertions = pool.stats.insertions;
1053    stats.pool_rejections_error = pool.stats.rejections_error;
1054    stats.pool_rejections_dedupe = pool.stats.rejections_dedupe;
1055    stats.pool_evictions = pool.stats.evictions;
1056    stats.pool_final_size = pool.len();
1057    stats.pool_best_error = pool.best_error;
1058    stats.search_time = search_start.elapsed();
1059    stats.early_exit = early_exit;
1060
1061    (pool.into_sorted(), stats)
1062}
1063
1064/// Perform one-sided search: generate RHS expressions only and match `x = RHS`.
1065pub fn search_one_sided_with_stats_and_config(
1066    gen_config: &crate::gen::GenConfig,
1067    config: &SearchConfig,
1068) -> (Vec<Match>, SearchStats) {
1069    use crate::eval::evaluate_with_context;
1070    use crate::expr::Expression;
1071    use crate::gen::generate_all_with_context;
1072    use crate::symbol::Symbol;
1073
1074    let gen_start = SearchTimer::start();
1075    let context = SearchContext::new(config);
1076
1077    let mut rhs_only = gen_config.clone();
1078    rhs_only.generate_lhs = false;
1079    rhs_only.generate_rhs = true;
1080
1081    let generated = generate_all_with_context(&rhs_only, config.target, &context.eval);
1082    let gen_time = gen_start.elapsed();
1083
1084    let search_start = SearchTimer::start();
1085    let initial_max_error = config.max_error.max(1e-12);
1086    let mut pool = TopKPool::new_with_diagnostics(
1087        config.max_matches,
1088        initial_max_error,
1089        config.show_db_adds,
1090        config.ranking_mode,
1091    );
1092    let mut stats = SearchStats::new();
1093    let mut early_exit = false;
1094
1095    let mut lhs_expr = Expression::new();
1096    lhs_expr.push_with_table(Symbol::X, &gen_config.symbol_table);
1097    let lhs_eval = evaluate_with_context(&lhs_expr, config.target, &context.eval);
1098    let lhs_eval = match lhs_eval {
1099        Ok(v) => v,
1100        Err(_) => {
1101            stats.gen_time = gen_time;
1102            stats.search_time = search_start.elapsed();
1103            return (Vec::new(), stats);
1104        }
1105    };
1106    let lhs = EvaluatedExpr::new(
1107        lhs_expr,
1108        lhs_eval.value,
1109        lhs_eval.derivative,
1110        lhs_eval.num_type,
1111    );
1112
1113    stats.lhs_count = 1;
1114    stats.rhs_count = generated.rhs.len();
1115    stats.lhs_tested = 1;
1116
1117    for rhs in generated.rhs {
1118        if !config.rhs_symbol_allowed(&rhs.expr) {
1119            continue;
1120        }
1121        stats.candidates_tested += 1;
1122        if config.show_match_checks {
1123            eprintln!(
1124                "  [match] checking lhs={:.6} rhs={:.6}",
1125                lhs.value, rhs.value
1126            );
1127        }
1128
1129        let error = rhs.value - config.target;
1130        let is_exact = error.abs() < EXACT_MATCH_TOLERANCE;
1131        if !pool.would_accept(error.abs(), is_exact) {
1132            continue;
1133        }
1134
1135        let m = Match {
1136            lhs: lhs.clone(),
1137            rhs: rhs.clone(),
1138            x_value: rhs.value,
1139            error,
1140            complexity: lhs.expr.complexity() + rhs.expr.complexity(),
1141        };
1142
1143        pool.try_insert(m);
1144
1145        if config.stop_at_exact && is_exact {
1146            early_exit = true;
1147            break;
1148        }
1149        if let Some(threshold) = config.stop_below {
1150            if error.abs() < threshold {
1151                early_exit = true;
1152                break;
1153            }
1154        }
1155    }
1156
1157    stats.pool_insertions = pool.stats.insertions;
1158    stats.pool_rejections_error = pool.stats.rejections_error;
1159    stats.pool_rejections_dedupe = pool.stats.rejections_dedupe;
1160    stats.pool_evictions = pool.stats.evictions;
1161    stats.pool_final_size = pool.len();
1162    stats.pool_best_error = pool.best_error;
1163    stats.gen_time = gen_time;
1164    stats.search_time = search_start.elapsed();
1165    stats.early_exit = early_exit;
1166
1167    (pool.into_sorted(), stats)
1168}
1169
1170/// Perform a parallel search using Rayon
1171///
1172/// This function is part of the public API for library consumers who want
1173/// parallel search without statistics collection.
1174#[cfg(feature = "parallel")]
1175#[allow(dead_code)]
1176pub fn search_parallel(
1177    target: f64,
1178    gen_config: &crate::gen::GenConfig,
1179    max_matches: usize,
1180) -> Vec<Match> {
1181    let (matches, _stats) = search_parallel_with_stats(target, gen_config, max_matches);
1182    matches
1183}
1184
1185/// Perform a parallel search with statistics collection
1186///
1187/// This function is part of the public API for library consumers who want
1188/// detailed statistics about the parallel search process.
1189#[cfg(feature = "parallel")]
1190#[allow(dead_code)]
1191pub fn search_parallel_with_stats(
1192    target: f64,
1193    gen_config: &crate::gen::GenConfig,
1194    max_matches: usize,
1195) -> (Vec<Match>, SearchStats) {
1196    search_parallel_with_stats_and_options(target, gen_config, max_matches, false, None)
1197}
1198
1199/// Perform a parallel search with statistics collection and early exit options
1200#[cfg(feature = "parallel")]
1201pub fn search_parallel_with_stats_and_options(
1202    target: f64,
1203    gen_config: &crate::gen::GenConfig,
1204    max_matches: usize,
1205    stop_at_exact: bool,
1206    stop_below: Option<f64>,
1207) -> (Vec<Match>, SearchStats) {
1208    let config = SearchConfig {
1209        target,
1210        max_matches,
1211        stop_at_exact,
1212        stop_below,
1213        user_constants: gen_config.user_constants.clone(),
1214        user_functions: gen_config.user_functions.clone(),
1215        ..Default::default()
1216    };
1217
1218    search_parallel_with_stats_and_config(gen_config, &config)
1219}
1220
1221/// Perform a parallel search with a fully specified search configuration.
1222///
1223/// This function includes a safety fallback: if expression generation would
1224/// exceed ~2M expressions (which would risk OOM), it automatically switches
1225/// to streaming mode to avoid memory exhaustion.
1226#[cfg(feature = "parallel")]
1227pub fn search_parallel_with_stats_and_config(
1228    gen_config: &crate::gen::GenConfig,
1229    config: &SearchConfig,
1230) -> (Vec<Match>, SearchStats) {
1231    use crate::gen::generate_all_with_limit_and_context;
1232
1233    const MAX_EXPRESSIONS_BEFORE_STREAMING: usize = 2_000_000;
1234    let context = SearchContext::new(config);
1235
1236    let gen_start = SearchTimer::start();
1237
1238    // Try bounded generation first — if limit exceeded, fall back to streaming
1239    if let Some(generated) = generate_all_with_limit_and_context(
1240        gen_config,
1241        config.target,
1242        &context.eval,
1243        MAX_EXPRESSIONS_BEFORE_STREAMING,
1244    ) {
1245        let gen_time = gen_start.elapsed();
1246
1247        // Build database
1248        let mut db = ExprDatabase::new();
1249        db.insert_rhs(generated.rhs);
1250
1251        // Find matches with stats
1252        let (matches, mut stats) = db.find_matches_with_stats_and_context(&generated.lhs, &context);
1253
1254        // Add generation stats
1255        stats.gen_time = gen_time;
1256        stats.lhs_count = generated.lhs.len();
1257        stats.rhs_count = db.rhs_count();
1258
1259        (matches, stats)
1260    } else {
1261        // Limit exceeded — fall back to streaming mode which avoids OOM
1262        search_streaming_with_config(gen_config, config)
1263    }
1264}
ries_rs/search.rs

ries_rs/
search.rs