ries_rs/
search.rs

1//! Search and matching algorithms
2//!
3//! Finds equations by matching LHS and RHS expressions.
4
5use crate::eval::EvalContext;
6use crate::expr::EvaluatedExpr;
7use crate::pool::{RankingMode, TopKPool};
8use crate::profile::UserConstant;
9use crate::thresholds::{
10    DEGENERATE_DERIVATIVE, DEGENERATE_RANGE_TOLERANCE, DEGENERATE_TEST_THRESHOLD,
11    EXACT_MATCH_TOLERANCE, NEWTON_FINAL_TOLERANCE,
12};
13use std::collections::HashSet;
14use std::time::Duration;
15
16mod db;
17mod newton;
18#[cfg(test)]
19mod tests;
20
21use db::calculate_adaptive_search_radius;
22pub use db::{ComplexityTier, ExprDatabase, TieredExprDatabase};
23#[cfg(test)]
24use newton::newton_raphson;
25use newton::newton_raphson_with_constants;
26
27#[derive(Clone, Copy, Debug)]
28struct SearchTimer {
29    #[cfg(all(target_arch = "wasm32", feature = "wasm"))]
30    start_ms: f64,
31    #[cfg(not(all(target_arch = "wasm32", feature = "wasm")))]
32    start: std::time::Instant,
33}
34
35impl SearchTimer {
36    #[inline]
37    fn start() -> Self {
38        #[cfg(all(target_arch = "wasm32", feature = "wasm"))]
39        {
40            Self {
41                start_ms: js_sys::Date::now(),
42            }
43        }
44
45        #[cfg(not(all(target_arch = "wasm32", feature = "wasm")))]
46        {
47            Self {
48                start: std::time::Instant::now(),
49            }
50        }
51    }
52
53    #[inline]
54    fn elapsed(self) -> Duration {
55        #[cfg(all(target_arch = "wasm32", feature = "wasm"))]
56        {
57            let elapsed_ms = (js_sys::Date::now() - self.start_ms).max(0.0);
58            Duration::from_secs_f64(elapsed_ms / 1000.0)
59        }
60
61        #[cfg(not(all(target_arch = "wasm32", feature = "wasm")))]
62        {
63            self.start.elapsed()
64        }
65    }
66}
67
68/// Statistics collected during search
69#[derive(Clone, Debug, Default)]
70pub struct SearchStats {
71    /// Time spent generating expressions
72    pub gen_time: Duration,
73    /// Time spent searching/matching
74    pub search_time: Duration,
75    /// Number of LHS expressions generated
76    pub lhs_count: usize,
77    /// Number of RHS expressions generated
78    pub rhs_count: usize,
79    /// Number of LHS expressions tested (after filtering)
80    pub lhs_tested: usize,
81    /// Number of candidate pairs tested (coarse filter)
82    pub candidates_tested: usize,
83    /// Number of Newton-Raphson calls
84    pub newton_calls: usize,
85    /// Number of successful Newton convergences
86    pub newton_success: usize,
87    /// Number of matches inserted into pool
88    pub pool_insertions: usize,
89    /// Number of matches rejected by pool (error threshold)
90    pub pool_rejections_error: usize,
91    /// Number of matches rejected by pool (dedupe)
92    pub pool_rejections_dedupe: usize,
93    /// Number of matches evicted from pool
94    pub pool_evictions: usize,
95    /// Final pool size
96    pub pool_final_size: usize,
97    /// Final best error in pool
98    pub pool_best_error: f64,
99    /// Whether search exited early
100    pub early_exit: bool,
101}
102
103impl SearchStats {
104    pub fn new() -> Self {
105        Self::default()
106    }
107
108    /// Print stats to stdout
109    pub fn print(&self) {
110        println!();
111        println!("  === Search Statistics ===");
112        println!();
113        println!("  Generation:");
114        println!(
115            "    Time:            {:>10.3}ms",
116            self.gen_time.as_secs_f64() * 1000.0
117        );
118        println!("    LHS expressions: {:>10}", self.lhs_count);
119        println!("    RHS expressions: {:>10}", self.rhs_count);
120        println!();
121        println!("  Search:");
122        println!(
123            "    Time:            {:>10.3}ms",
124            self.search_time.as_secs_f64() * 1000.0
125        );
126        println!("    LHS tested:      {:>10}", self.lhs_tested);
127        println!("    Candidates:      {:>10}", self.candidates_tested);
128        println!("    Newton calls:    {:>10}", self.newton_calls);
129        println!(
130            "    Newton success:  {:>10} ({:.1}%)",
131            self.newton_success,
132            if self.newton_calls > 0 {
133                100.0 * self.newton_success as f64 / self.newton_calls as f64
134            } else {
135                0.0
136            }
137        );
138        if self.early_exit {
139            println!("    Early exit:      yes");
140        }
141        println!();
142        println!("  Pool:");
143        println!("    Insertions:      {:>10}", self.pool_insertions);
144        println!("    Rejected (err):  {:>10}", self.pool_rejections_error);
145        println!("    Rejected (dup):  {:>10}", self.pool_rejections_dedupe);
146        println!("    Evictions:       {:>10}", self.pool_evictions);
147        println!("    Final size:      {:>10}", self.pool_final_size);
148        println!("    Best error:      {:>14.2e}", self.pool_best_error);
149    }
150}
151
152/// Compute complexity limits from a search level.
153///
154/// This function provides a consistent mapping from integer search levels
155/// to complexity limits used by the library API (Python, WASM, and adaptive mode).
156///
157/// # Formula
158///
159/// - Base LHS complexity: 10
160/// - Base RHS complexity: 12
161/// - Level multiplier: 4
162/// - `max_lhs = 10 + 4 * level`
163/// - `max_rhs = 12 + 4 * level`
164///
165/// # Level Guidelines
166///
167/// | Level | LHS Max | RHS Max | Expression Count (approx) |
168/// |-------|---------|---------|---------------------------|
169/// | 0     | 10      | 12      | ~35K                      |
170/// | 1     | 14      | 16      | ~130K                     |
171/// | 2     | 18      | 20      | ~500K                     |
172/// | 3     | 22      | 24      | ~2M                       |
173/// | 5     | 30      | 32      | ~15M                      |
174///
175/// # Note
176///
177/// The CLI uses a different formula with higher bases (35/35) and multiplier (10)
178/// to match the original RIES command-line behavior. This function is for
179/// programmatic API consumers.
180#[inline]
181pub fn level_to_complexity(level: u32) -> (u32, u32) {
182    const BASE_LHS: u32 = 10;
183    const BASE_RHS: u32 = 12;
184    const LEVEL_MULTIPLIER: u32 = 4;
185
186    // Saturating arithmetic avoids panics/wraparound if an API caller passes
187    // an out-of-range level without validation.
188    let level_factor = LEVEL_MULTIPLIER.saturating_mul(level);
189    (
190        BASE_LHS.saturating_add(level_factor),
191        BASE_RHS.saturating_add(level_factor),
192    )
193}
194
195/// A matched equation
196#[derive(Clone, Debug)]
197pub struct Match {
198    /// Left-hand side expression (contains x)
199    pub lhs: EvaluatedExpr,
200    /// Right-hand side expression (constant)
201    pub rhs: EvaluatedExpr,
202    /// Solved value of x
203    pub x_value: f64,
204    /// Difference from target: x_value - target
205    pub error: f64,
206    /// Total complexity (LHS + RHS)
207    pub complexity: u32,
208}
209
210impl Match {
211    /// Format the match for display (used in tests)
212    #[cfg(test)]
213    pub fn display(&self, _target: f64) -> String {
214        let lhs_str = self.lhs.expr.to_infix();
215        let rhs_str = self.rhs.expr.to_infix();
216
217        let error_str = if self.error.abs() < EXACT_MATCH_TOLERANCE {
218            "('exact' match)".to_string()
219        } else {
220            let sign = if self.error >= 0.0 { "+" } else { "-" };
221            format!("for x = T {} {:.6e}", sign, self.error.abs())
222        };
223
224        format!(
225            "{:>24} = {:<24} {} {{{}}}",
226            lhs_str, rhs_str, error_str, self.complexity
227        )
228    }
229}
230
231/// Configuration for the search process.
232///
233/// Controls matching thresholds, result limits, symbol filtering, and search behavior.
234/// This struct is the main entry point for customizing how RIES searches for equations.
235///
236/// # Example
237///
238/// ```rust
239/// use ries_rs::search::SearchConfig;
240/// use ries_rs::pool::RankingMode;
241///
242/// let config = SearchConfig {
243///     target: std::f64::consts::PI,
244///     max_matches: 50,
245///     max_error: 1e-10,
246///     stop_at_exact: true,
247///     ranking_mode: RankingMode::Complexity,
248///     ..SearchConfig::default()
249/// };
250/// ```
251///
252/// # Fields Overview
253///
254/// - **Target**: `target` - the value to search for equations matching it
255/// - **Limits**: `max_matches`, `max_error` - control result quantity and quality
256/// - **Stopping**: `stop_at_exact`, `stop_below` - early termination conditions
257/// - **Refinement**: `newton_iterations`, `refine_with_newton`, `derivative_margin` - Newton-Raphson settings
258/// - **Filtering**: `zero_value_threshold`, `rhs_allowed_symbols`, `rhs_excluded_symbols` - prune unwanted results
259/// - **Extensions**: `user_constants`, `user_functions` - custom symbols
260/// - **Diagnostics**: `show_newton`, `show_match_checks`, etc. - debug output
261/// - **Ranking**: `ranking_mode` - how to order results
262#[derive(Clone, Debug)]
263pub struct SearchConfig {
264    /// Target value to find equations for.
265    ///
266    /// The search will find equations where LHS ≈ RHS ≈ target.
267    /// This is the number you're trying to "solve" or represent symbolically.
268    ///
269    /// Default: 0.0
270    pub target: f64,
271
272    /// Maximum number of matches to return in results.
273    ///
274    /// Once this many matches are found and confirmed, the pool will start
275    /// evicting lower-quality matches to make room for better ones.
276    ///
277    /// Default: 100
278    pub max_matches: usize,
279
280    /// Maximum acceptable error for a match to be included.
281    ///
282    /// Only expressions within this absolute error from the target are considered matches.
283    /// Smaller values give more precise but fewer results.
284    ///
285    /// Default: 1.0
286    pub max_error: f64,
287
288    /// Stop search when an exact match is found.
289    ///
290    /// When true and an expression matches within the exact match tolerance,
291    /// the search terminates early. Useful when you only need one good solution.
292    ///
293    /// Default: false
294    pub stop_at_exact: bool,
295
296    /// Stop search when error goes below this threshold.
297    ///
298    /// If set, the search will terminate once a match is found with error
299    /// below this value. Set to `Some(1e-12)` for high-precision early stopping.
300    ///
301    /// Default: None
302    pub stop_below: Option<f64>,
303
304    /// Threshold for pruning LHS expressions with near-zero values.
305    ///
306    /// LHS expressions with absolute values below this threshold are pruned
307    /// to prevent flooding results with trivial matches like `cospi(2.5) = 0`.
308    /// Set to 0.0 to disable this filtering.
309    ///
310    /// Default: 1e-4
311    pub zero_value_threshold: f64,
312
313    /// Maximum Newton-Raphson iterations for root refinement.
314    ///
315    /// Controls how many iterations are used to refine candidate solutions.
316    /// Higher values may find more precise roots but take longer.
317    ///
318    /// Default: 15
319    pub newton_iterations: usize,
320
321    /// User-defined constants for evaluation.
322    ///
323    /// Custom constants that can be used in expressions, defined via `-N\'name=value\'`.
324    /// Each constant has a name, value, and optional description.
325    ///
326    /// Default: empty
327    pub user_constants: Vec<UserConstant>,
328
329    /// User-defined functions for evaluation.
330    ///
331    /// Custom functions that can be used in expressions, defined via `-F\'name:formula\'`.
332    /// These extend the built-in functions (sin, cos, etc.).
333    ///
334    /// Default: empty
335    pub user_functions: Vec<crate::udf::UserFunction>,
336
337    /// Argument scale for `sinpi/cospi/tanpi` evaluation.
338    ///
339    /// The default is π, matching original `sinpi(x) = sin(πx)` semantics.
340    /// Override this for alternate trig conventions without relying on global state.
341    pub trig_argument_scale: f64,
342
343    /// Whether to refine candidate roots with Newton-Raphson iteration.
344    ///
345    /// When true, initial coarse matches are refined using Newton-Raphson
346    /// to find more precise solutions. Disable for faster but less precise search.
347    ///
348    /// Default: true
349    pub refine_with_newton: bool,
350
351    /// Optional RHS-only allowed symbol set.
352    ///
353    /// If set, all symbols used on the RHS must be in this set (specified as ASCII bytes).
354    /// This allows restricting RHS expressions to a subset of available symbols.
355    /// Combined with `rhs_excluded_symbols`, both filters apply.
356    ///
357    /// Default: None (all symbols allowed)
358    pub rhs_allowed_symbols: Option<HashSet<u8>>,
359
360    /// Optional RHS-only excluded symbol set.
361    ///
362    /// If set, RHS expressions using any of these symbols are rejected.
363    /// This allows excluding specific symbols from RHS expressions.
364    /// Combined with `rhs_allowed_symbols`, both filters apply.
365    ///
366    /// Default: None (no symbols excluded)
367    pub rhs_excluded_symbols: Option<HashSet<u8>>,
368
369    /// Show Newton-Raphson iteration diagnostic output.
370    ///
371    /// When true, prints detailed information about each Newton-Raphson iteration.
372    /// Enabled by `-Dn` command-line flag. Useful for debugging convergence issues.
373    ///
374    /// Default: false
375    pub show_newton: bool,
376
377    /// Show match check diagnostic output.
378    ///
379    /// When true, prints information about each candidate match evaluation.
380    /// Enabled by `-Do` command-line flag.
381    ///
382    /// Default: false
383    pub show_match_checks: bool,
384
385    /// Show pruned arithmetic diagnostic output.
386    ///
387    /// When true, prints information about arithmetic expressions that were pruned.
388    /// Enabled by `-DA` command-line flag.
389    ///
390    /// Default: false
391    #[allow(dead_code)]
392    pub show_pruned_arith: bool,
393
394    /// Show pruned range/zero diagnostic output.
395    ///
396    /// When true, prints information about expressions pruned due to range
397    /// constraints or near-zero values. Enabled by `-DB` command-line flag.
398    ///
399    /// Default: false
400    pub show_pruned_range: bool,
401
402    /// Show database adds diagnostic output.
403    ///
404    /// When true, prints information about expressions added to the database.
405    /// Enabled by `-DG` command-line flag.
406    ///
407    /// Default: false
408    pub show_db_adds: bool,
409
410    /// When true, matches must match all significant digits of the target.
411    ///
412    /// When enabled, the tolerance is computed based on the magnitude of the
413    /// target value to require full precision matching. The actual tolerance
414    /// is computed in `main.rs` and passed as `max_error`.
415    ///
416    /// Default: false
417    #[allow(dead_code)]
418    pub match_all_digits: bool,
419
420    /// Threshold below which derivative is considered degenerate in Newton-Raphson.
421    ///
422    /// If `|derivative| < derivative_margin` during Newton-Raphson iteration,
423    /// the refinement is skipped to avoid numerical instability and division
424    /// by near-zero values.
425    ///
426    /// Default: 1e-12 (from `DEGENERATE_DERIVATIVE` constant)
427    pub derivative_margin: f64,
428
429    /// Ranking mode for pool ordering and eviction.
430    ///
431    /// Determines how matches are ranked in the result pool:
432    /// - `Complexity`: Sort by expression complexity (simpler is better)
433    /// - `Error`: Sort by match error (more precise is better)
434    ///
435    /// Default: `RankingMode::Complexity`
436    pub ranking_mode: RankingMode,
437}
438
439impl Default for SearchConfig {
440    fn default() -> Self {
441        Self {
442            target: 0.0,
443            max_matches: 100,
444            max_error: 1.0,
445            stop_at_exact: false,
446            stop_below: None,
447            zero_value_threshold: 1e-4,
448            newton_iterations: 15,
449            user_constants: Vec::new(),
450            user_functions: Vec::new(),
451            trig_argument_scale: crate::eval::DEFAULT_TRIG_ARGUMENT_SCALE,
452            refine_with_newton: true,
453            rhs_allowed_symbols: None,
454            rhs_excluded_symbols: None,
455            show_newton: false,
456            show_match_checks: false,
457            show_pruned_arith: false,
458            show_pruned_range: false,
459            show_db_adds: false,
460            match_all_digits: false,
461            derivative_margin: DEGENERATE_DERIVATIVE,
462            ranking_mode: RankingMode::Complexity,
463        }
464    }
465}
466
467impl SearchConfig {
468    /// Build an explicit search context for this configuration.
469    pub fn context(&self) -> SearchContext<'_> {
470        SearchContext::new(self)
471    }
472
473    #[inline]
474    fn rhs_symbol_allowed(&self, rhs: &crate::expr::Expression) -> bool {
475        let symbols = rhs.symbols();
476
477        if let Some(allowed) = &self.rhs_allowed_symbols {
478            if symbols.iter().any(|s| !allowed.contains(&(*s as u8))) {
479                return false;
480            }
481        }
482
483        if let Some(excluded) = &self.rhs_excluded_symbols {
484            if symbols.iter().any(|s| excluded.contains(&(*s as u8))) {
485                return false;
486            }
487        }
488
489        true
490    }
491}
492
493/// Explicit per-run search context.
494#[derive(Clone, Copy, Debug)]
495pub struct SearchContext<'a> {
496    /// Immutable search configuration for this run.
497    pub config: &'a SearchConfig,
498    /// Evaluation context derived from the search configuration.
499    pub eval: EvalContext<'a>,
500}
501
502impl<'a> SearchContext<'a> {
503    pub fn new(config: &'a SearchConfig) -> Self {
504        Self {
505            config,
506            eval: EvalContext::from_slices(&config.user_constants, &config.user_functions)
507                .with_trig_argument_scale(config.trig_argument_scale),
508        }
509    }
510}
511
512/// Perform a complete search
513///
514/// This function is part of the public API for library consumers who want a simple
515/// search interface without statistics collection.
516#[allow(dead_code)]
517pub fn search(target: f64, gen_config: &crate::gen::GenConfig, max_matches: usize) -> Vec<Match> {
518    let (matches, _stats) = search_with_stats(target, gen_config, max_matches);
519    matches
520}
521
522/// Perform a complete search with statistics collection
523///
524/// This function is part of the public API for library consumers who want
525/// detailed statistics about the search process.
526#[allow(dead_code)]
527pub fn search_with_stats(
528    target: f64,
529    gen_config: &crate::gen::GenConfig,
530    max_matches: usize,
531) -> (Vec<Match>, SearchStats) {
532    search_with_stats_and_options(target, gen_config, max_matches, false, None)
533}
534
535/// Perform a complete search with statistics collection and early exit options
536pub fn search_with_stats_and_options(
537    target: f64,
538    gen_config: &crate::gen::GenConfig,
539    max_matches: usize,
540    stop_at_exact: bool,
541    stop_below: Option<f64>,
542) -> (Vec<Match>, SearchStats) {
543    let config = SearchConfig {
544        target,
545        max_matches,
546        stop_at_exact,
547        stop_below,
548        user_constants: gen_config.user_constants.clone(),
549        user_functions: gen_config.user_functions.clone(),
550        ..Default::default()
551    };
552
553    search_with_stats_and_config(gen_config, &config)
554}
555
556/// Perform a complete search with a fully specified search configuration.
557///
558/// This function includes a safety fallback: if expression generation would
559/// exceed ~2M expressions (which would risk OOM), it automatically switches
560/// to streaming mode to avoid memory exhaustion.
561pub fn search_with_stats_and_config(
562    gen_config: &crate::gen::GenConfig,
563    config: &SearchConfig,
564) -> (Vec<Match>, SearchStats) {
565    use crate::gen::generate_all_with_limit_and_context;
566
567    const MAX_EXPRESSIONS_BEFORE_STREAMING: usize = 2_000_000;
568    let context = SearchContext::new(config);
569
570    let gen_start = SearchTimer::start();
571
572    // Try bounded generation first — if limit exceeded, fall back to streaming
573    if let Some(generated) = generate_all_with_limit_and_context(
574        gen_config,
575        config.target,
576        &context.eval,
577        MAX_EXPRESSIONS_BEFORE_STREAMING,
578    ) {
579        let gen_time = gen_start.elapsed();
580
581        // Build database
582        let mut db = ExprDatabase::new();
583        db.insert_rhs(generated.rhs);
584
585        // Find matches with stats
586        let (matches, mut stats) = db.find_matches_with_stats_and_context(&generated.lhs, &context);
587
588        // Add generation stats
589        stats.gen_time = gen_time;
590        stats.lhs_count = generated.lhs.len();
591        stats.rhs_count = db.rhs_count();
592
593        (matches, stats)
594    } else {
595        // Limit exceeded — fall back to streaming mode which avoids OOM
596        search_streaming_with_config(gen_config, config)
597    }
598}
599
600// =============================================================================
601// ADAPTIVE SEARCH - Iterative LHS/RHS expansion like original RIES
602// =============================================================================
603
604/// Perform an adaptive search that iteratively expands LHS/RHS complexity
605///
606/// This implements the original RIES algorithm where expressions are generated
607/// incrementally, expanding the side (LHS or RHS) that has fewer expressions.
608/// This ensures balanced growth and matches the original's expression counts.
609///
610/// # Algorithm
611///
612/// 1. Start with minimal complexity limits (LHS: 1, RHS: 1)
613/// 2. Generate expressions at current complexity
614/// 3. Track how many expressions were generated for each side
615/// 4. Expand the side with fewer expressions (increase complexity by 1)
616/// 5. Repeat until target expression count is reached
617/// 6. Then perform matching on all generated expressions
618///
619/// # Advantages
620///
621/// - Matches original RIES behavior exactly
622/// - Generates similar number of expressions as original
623/// - Better memory efficiency than generating all at once
624/// - Can leverage parallelization more effectively
625///
626/// # Expression Count Formula
627///
628/// Target expressions = 2000 × 4^(2 + level)
629/// - Level 0: ~32,000 expressions
630/// - Level 1: ~128,000 expressions
631/// - Level 2: ~512,000 expressions
632/// - Level 3: ~2,048,000 expressions
633pub fn search_adaptive(
634    base_config: &crate::gen::GenConfig,
635    search_config: &SearchConfig,
636    level: u32,
637) -> (Vec<Match>, SearchStats) {
638    use crate::expr::EvaluatedExpr;
639    use crate::gen::{quantize_value, LhsKey};
640    use std::collections::HashMap;
641
642    let gen_start = SearchTimer::start();
643    let context = SearchContext::new(search_config);
644    // Use HashMap so dedup keeps the simplest expression, not first-seen.
645    // With parallel generation the arrival order is non-deterministic, so
646    // first-seen could discard a simpler equivalent expression.
647    let mut lhs_map: HashMap<LhsKey, EvaluatedExpr> = HashMap::new();
648    let mut rhs_map: HashMap<i64, EvaluatedExpr> = HashMap::new();
649
650    // For adaptive mode, use the standard complexity formula
651    // See level_to_complexity() for the standard mapping
652    let (std_lhs, std_rhs) = level_to_complexity(level);
653
654    let mut config = base_config.clone();
655    config.max_lhs_complexity = std_lhs.max(base_config.max_lhs_complexity);
656    config.max_rhs_complexity = std_rhs.max(base_config.max_rhs_complexity);
657
658    // Generate all expressions at the calculated complexity
659    // Use parallel generation when available for significantly better performance
660    let generated = {
661        #[cfg(feature = "parallel")]
662        {
663            crate::gen::generate_all_parallel_with_context(
664                &config,
665                search_config.target,
666                &context.eval,
667            )
668        }
669        #[cfg(not(feature = "parallel"))]
670        {
671            crate::gen::generate_all_with_context(&config, search_config.target, &context.eval)
672        }
673    };
674
675    // Deduplicate LHS expressions, keeping the simplest equivalent
676    for expr in generated.lhs {
677        let key = (quantize_value(expr.value), quantize_value(expr.derivative));
678        lhs_map
679            .entry(key)
680            .and_modify(|existing| {
681                if expr.expr.complexity() < existing.expr.complexity() {
682                    *existing = expr.clone();
683                }
684            })
685            .or_insert(expr);
686    }
687
688    // Deduplicate RHS expressions, keeping the simplest equivalent
689    for expr in generated.rhs {
690        let key = quantize_value(expr.value);
691        rhs_map
692            .entry(key)
693            .and_modify(|existing| {
694                if expr.expr.complexity() < existing.expr.complexity() {
695                    *existing = expr.clone();
696                }
697            })
698            .or_insert(expr);
699    }
700
701    let all_lhs: Vec<EvaluatedExpr> = lhs_map.into_values().collect();
702    let all_rhs: Vec<EvaluatedExpr> = rhs_map.into_values().collect();
703
704    let gen_time = gen_start.elapsed();
705
706    // Now perform the actual matching
707    let mut db = ExprDatabase::new();
708    db.insert_rhs(all_rhs);
709
710    let search_start = SearchTimer::start();
711    let (matches, match_stats) = db.find_matches_with_stats_and_context(&all_lhs, &context);
712    let search_time = search_start.elapsed();
713
714    // Combine stats
715    let mut stats = SearchStats::new();
716    stats.gen_time = gen_time;
717    stats.search_time = search_time;
718    stats.lhs_count = all_lhs.len();
719    stats.rhs_count = db.rhs_count();
720    stats.lhs_tested = match_stats.lhs_tested;
721    stats.candidates_tested = match_stats.candidates_tested;
722    stats.newton_calls = match_stats.newton_calls;
723    stats.newton_success = match_stats.newton_success;
724    stats.pool_insertions = match_stats.pool_insertions;
725    stats.pool_rejections_error = match_stats.pool_rejections_error;
726    stats.pool_rejections_dedupe = match_stats.pool_rejections_dedupe;
727    stats.pool_evictions = match_stats.pool_evictions;
728    stats.pool_final_size = match_stats.pool_final_size;
729    stats.pool_best_error = match_stats.pool_best_error;
730    stats.early_exit = match_stats.early_exit;
731
732    (matches, stats)
733}
734
735// =============================================================================
736// STREAMING SEARCH - Memory-efficient search for high complexity levels
737// =============================================================================
738
739/// Perform a streaming search that processes expressions as they're generated
740///
741/// This is the memory-efficient version of search that builds the RHS database
742/// incrementally. Instead of generating ALL expressions into memory first,
743/// it processes RHS expressions immediately and matches LHS expressions as
744/// they arrive.
745///
746/// # Memory Efficiency
747///
748/// - Traditional: O(expressions) memory - all expressions stored
749/// - Streaming: O(database) memory - only RHS database stored, LHS processed on-the-fly
750///
751/// # When to Use
752///
753/// Use streaming search when:
754/// - Complexity levels are high (L4+, where expressions count > 10M)
755/// - Memory is limited
756/// - You want to see results progressively
757///
758/// # Example
759///
760/// ```no_run
761/// use ries_rs::gen::GenConfig;
762/// use ries_rs::search::search_streaming;
763/// let config = GenConfig::default();
764/// let (matches, stats) = search_streaming(2.5, &config, 100, false, None);
765/// ```
766#[allow(dead_code)]
767pub fn search_streaming(
768    target: f64,
769    gen_config: &crate::gen::GenConfig,
770    max_matches: usize,
771    stop_at_exact: bool,
772    stop_below: Option<f64>,
773) -> (Vec<Match>, SearchStats) {
774    let config = SearchConfig {
775        target,
776        max_matches,
777        stop_at_exact,
778        stop_below,
779        user_constants: gen_config.user_constants.clone(),
780        user_functions: gen_config.user_functions.clone(),
781        ..Default::default()
782    };
783
784    search_streaming_with_config(gen_config, &config)
785}
786
787/// Perform a streaming search with a fully specified search configuration.
788pub fn search_streaming_with_config(
789    gen_config: &crate::gen::GenConfig,
790    search_config: &SearchConfig,
791) -> (Vec<Match>, SearchStats) {
792    use crate::gen::{generate_streaming_with_context, StreamingCallbacks};
793    use std::collections::HashMap;
794
795    let gen_start = SearchTimer::start();
796    let mut stats = SearchStats::new();
797    let context = SearchContext::new(search_config);
798
799    // Respect configured max error (with a tiny floor for numerical stability)
800    let initial_max_error = search_config.max_error.max(1e-12);
801
802    // Create bounded pool with configured capacity
803    let mut pool = TopKPool::new_with_diagnostics(
804        search_config.max_matches,
805        initial_max_error,
806        search_config.show_db_adds,
807        search_config.ranking_mode,
808    );
809
810    // Build RHS database first using streaming
811    let mut rhs_db = TieredExprDatabase::new();
812    let mut rhs_map: HashMap<i64, crate::expr::EvaluatedExpr> = HashMap::new();
813
814    // Collect LHS expressions to process later
815    let mut lhs_exprs: Vec<crate::expr::EvaluatedExpr> = Vec::new();
816
817    // First pass: collect all RHS and LHS via streaming
818    {
819        let mut callbacks = StreamingCallbacks {
820            on_rhs: &mut |expr| {
821                // Deduplicate by value, keeping simplest.
822                // Use quantize_value (not inline * 1e8) to avoid i64 overflow for large values.
823                let key = crate::gen::quantize_value(expr.value);
824                rhs_map
825                    .entry(key)
826                    .and_modify(|existing| {
827                        if expr.expr.complexity() < existing.expr.complexity() {
828                            *existing = expr.clone();
829                        }
830                    })
831                    .or_insert_with(|| expr.clone());
832                true
833            },
834            on_lhs: &mut |expr| {
835                lhs_exprs.push(expr.clone());
836                true
837            },
838        };
839
840        // Generate with streaming callbacks
841        generate_streaming_with_context(
842            gen_config,
843            search_config.target,
844            &context.eval,
845            &mut callbacks,
846        );
847    }
848
849    // Build the tiered database from deduplicated RHS
850    for expr in rhs_map.into_values() {
851        rhs_db.insert(expr);
852    }
853    rhs_db.finalize();
854
855    stats.rhs_count = rhs_db.total_count();
856    stats.lhs_count = lhs_exprs.len();
857    stats.gen_time = gen_start.elapsed();
858
859    // Now match LHS expressions against the RHS database
860    let search_start = SearchTimer::start();
861
862    // Sort LHS by complexity so simpler expressions are processed first
863    lhs_exprs.sort_by_key(|e| e.expr.complexity());
864
865    // Early exit tracking
866    let mut early_exit = false;
867
868    'outer: for lhs in &lhs_exprs {
869        if early_exit {
870            break;
871        }
872
873        // Skip LHS with value too close to 0
874        if lhs.value.abs() < search_config.zero_value_threshold {
875            if search_config.show_pruned_range {
876                eprintln!(
877                    "  [pruned range] value={:.6e} reason=\"near-zero\" expr=\"{}\"",
878                    lhs.value,
879                    lhs.expr.to_infix()
880                );
881            }
882            continue;
883        }
884
885        // Skip degenerate expressions
886        if lhs.derivative.abs() < DEGENERATE_TEST_THRESHOLD {
887            let test_x = search_config.target + std::f64::consts::E;
888            // Use the full evaluator (including user_functions) so that UDF-containing
889            // expressions are not silently skipped due to evaluate_with_constants
890            // returning Err for user-function symbols.
891            if let Ok(test_result) =
892                crate::eval::evaluate_fast_with_context(&lhs.expr, test_x, &context.eval)
893            {
894                let value_unchanged =
895                    (test_result.value - lhs.value).abs() < DEGENERATE_TEST_THRESHOLD;
896                let deriv_still_zero = test_result.derivative.abs() < DEGENERATE_TEST_THRESHOLD;
897                if deriv_still_zero || value_unchanged {
898                    continue;
899                }
900            }
901
902            // Check for value match
903            stats.lhs_tested += 1;
904            for rhs in rhs_db.iter_tiers_in_range(
905                lhs.value - DEGENERATE_RANGE_TOLERANCE,
906                lhs.value + DEGENERATE_RANGE_TOLERANCE,
907            ) {
908                if !search_config.rhs_symbol_allowed(&rhs.expr) {
909                    continue;
910                }
911                stats.candidates_tested += 1;
912                if search_config.show_match_checks {
913                    eprintln!(
914                        "  [match] checking lhs={:.6} rhs={:.6}",
915                        lhs.value, rhs.value
916                    );
917                }
918                let val_diff = (lhs.value - rhs.value).abs();
919                if val_diff < DEGENERATE_RANGE_TOLERANCE && pool.would_accept(0.0, true) {
920                    let m = Match {
921                        lhs: lhs.clone(),
922                        rhs: rhs.clone(),
923                        x_value: search_config.target,
924                        error: 0.0,
925                        complexity: lhs.expr.complexity() + rhs.expr.complexity(),
926                    };
927                    pool.try_insert(m);
928                }
929            }
930            continue;
931        }
932
933        stats.lhs_tested += 1;
934
935        // Use adaptive search radius
936        let search_radius = calculate_adaptive_search_radius(
937            lhs.derivative,
938            lhs.expr.complexity(),
939            pool.len(),
940            search_config.max_matches,
941            pool.best_error,
942        );
943        let low = lhs.value - search_radius;
944        let high = lhs.value + search_radius;
945
946        // Search using tiered iterator (lower tiers first)
947        for rhs in rhs_db.iter_tiers_in_range(low, high) {
948            if !search_config.rhs_symbol_allowed(&rhs.expr) {
949                continue;
950            }
951            stats.candidates_tested += 1;
952            if search_config.show_match_checks {
953                eprintln!(
954                    "  [match] checking lhs={:.6} rhs={:.6}",
955                    lhs.value, rhs.value
956                );
957            }
958
959            // Compute initial error estimate (coarse filter)
960            let val_diff = lhs.value - rhs.value;
961            let x_delta = -val_diff / lhs.derivative;
962            let coarse_error = x_delta.abs();
963
964            // Skip if coarse estimate won't pass threshold
965            let is_potentially_exact = coarse_error < NEWTON_FINAL_TOLERANCE;
966            if !pool.would_accept_strict(coarse_error, is_potentially_exact) {
967                continue;
968            }
969
970            if !search_config.refine_with_newton {
971                let refined_x = search_config.target + x_delta;
972                let refined_error = x_delta;
973                let is_exact = refined_error.abs() < EXACT_MATCH_TOLERANCE;
974
975                if pool.would_accept(refined_error.abs(), is_exact) {
976                    let m = Match {
977                        lhs: lhs.clone(),
978                        rhs: rhs.clone(),
979                        x_value: refined_x,
980                        error: refined_error,
981                        complexity: lhs.expr.complexity() + rhs.expr.complexity(),
982                    };
983
984                    pool.try_insert(m);
985
986                    if search_config.stop_at_exact && is_exact {
987                        early_exit = true;
988                        break 'outer;
989                    }
990                    if let Some(threshold) = search_config.stop_below {
991                        if refined_error.abs() < threshold {
992                            early_exit = true;
993                            break 'outer;
994                        }
995                    }
996                }
997                continue;
998            }
999
1000            // Refine with Newton-Raphson
1001            stats.newton_calls += 1;
1002            if let Some(refined_x) = newton_raphson_with_constants(
1003                &lhs.expr,
1004                rhs.value,
1005                search_config.target,
1006                search_config.newton_iterations,
1007                &context.eval,
1008                search_config.show_newton,
1009                search_config.derivative_margin,
1010            ) {
1011                stats.newton_success += 1;
1012                let refined_error = refined_x - search_config.target;
1013                let is_exact = refined_error.abs() < EXACT_MATCH_TOLERANCE;
1014
1015                // Check if this is acceptable
1016                if pool.would_accept(refined_error.abs(), is_exact) {
1017                    let m = Match {
1018                        lhs: lhs.clone(),
1019                        rhs: rhs.clone(),
1020                        x_value: refined_x,
1021                        error: refined_error,
1022                        complexity: lhs.expr.complexity() + rhs.expr.complexity(),
1023                    };
1024
1025                    // Insert into pool
1026                    pool.try_insert(m);
1027
1028                    // Check early exit conditions
1029                    if search_config.stop_at_exact && is_exact {
1030                        early_exit = true;
1031                        break 'outer;
1032                    }
1033                    if let Some(threshold) = search_config.stop_below {
1034                        if refined_error.abs() < threshold {
1035                            early_exit = true;
1036                            break 'outer;
1037                        }
1038                    }
1039                }
1040            }
1041        }
1042    }
1043
1044    // Collect pool stats
1045    stats.pool_insertions = pool.stats.insertions;
1046    stats.pool_rejections_error = pool.stats.rejections_error;
1047    stats.pool_rejections_dedupe = pool.stats.rejections_dedupe;
1048    stats.pool_evictions = pool.stats.evictions;
1049    stats.pool_final_size = pool.len();
1050    stats.pool_best_error = pool.best_error;
1051    stats.search_time = search_start.elapsed();
1052    stats.early_exit = early_exit;
1053
1054    (pool.into_sorted(), stats)
1055}
1056
1057/// Perform one-sided search: generate RHS expressions only and match `x = RHS`.
1058pub fn search_one_sided_with_stats_and_config(
1059    gen_config: &crate::gen::GenConfig,
1060    config: &SearchConfig,
1061) -> (Vec<Match>, SearchStats) {
1062    use crate::eval::evaluate_with_context;
1063    use crate::expr::Expression;
1064    use crate::gen::generate_all_with_context;
1065    use crate::symbol::Symbol;
1066
1067    let gen_start = SearchTimer::start();
1068    let context = SearchContext::new(config);
1069
1070    let mut rhs_only = gen_config.clone();
1071    rhs_only.generate_lhs = false;
1072    rhs_only.generate_rhs = true;
1073
1074    let generated = generate_all_with_context(&rhs_only, config.target, &context.eval);
1075    let gen_time = gen_start.elapsed();
1076
1077    let search_start = SearchTimer::start();
1078    let initial_max_error = config.max_error.max(1e-12);
1079    let mut pool = TopKPool::new_with_diagnostics(
1080        config.max_matches,
1081        initial_max_error,
1082        config.show_db_adds,
1083        config.ranking_mode,
1084    );
1085    let mut stats = SearchStats::new();
1086    let mut early_exit = false;
1087
1088    let mut lhs_expr = Expression::new();
1089    lhs_expr.push_with_table(Symbol::X, &gen_config.symbol_table);
1090    let lhs_eval = evaluate_with_context(&lhs_expr, config.target, &context.eval);
1091    let lhs_eval = match lhs_eval {
1092        Ok(v) => v,
1093        Err(_) => {
1094            stats.gen_time = gen_time;
1095            stats.search_time = search_start.elapsed();
1096            return (Vec::new(), stats);
1097        }
1098    };
1099    let lhs = EvaluatedExpr::new(
1100        lhs_expr,
1101        lhs_eval.value,
1102        lhs_eval.derivative,
1103        lhs_eval.num_type,
1104    );
1105
1106    stats.lhs_count = 1;
1107    stats.rhs_count = generated.rhs.len();
1108    stats.lhs_tested = 1;
1109
1110    for rhs in generated.rhs {
1111        if !config.rhs_symbol_allowed(&rhs.expr) {
1112            continue;
1113        }
1114        stats.candidates_tested += 1;
1115        if config.show_match_checks {
1116            eprintln!(
1117                "  [match] checking lhs={:.6} rhs={:.6}",
1118                lhs.value, rhs.value
1119            );
1120        }
1121
1122        let error = rhs.value - config.target;
1123        let is_exact = error.abs() < EXACT_MATCH_TOLERANCE;
1124        if !pool.would_accept(error.abs(), is_exact) {
1125            continue;
1126        }
1127
1128        let m = Match {
1129            lhs: lhs.clone(),
1130            rhs: rhs.clone(),
1131            x_value: rhs.value,
1132            error,
1133            complexity: lhs.expr.complexity() + rhs.expr.complexity(),
1134        };
1135
1136        pool.try_insert(m);
1137
1138        if config.stop_at_exact && is_exact {
1139            early_exit = true;
1140            break;
1141        }
1142        if let Some(threshold) = config.stop_below {
1143            if error.abs() < threshold {
1144                early_exit = true;
1145                break;
1146            }
1147        }
1148    }
1149
1150    stats.pool_insertions = pool.stats.insertions;
1151    stats.pool_rejections_error = pool.stats.rejections_error;
1152    stats.pool_rejections_dedupe = pool.stats.rejections_dedupe;
1153    stats.pool_evictions = pool.stats.evictions;
1154    stats.pool_final_size = pool.len();
1155    stats.pool_best_error = pool.best_error;
1156    stats.gen_time = gen_time;
1157    stats.search_time = search_start.elapsed();
1158    stats.early_exit = early_exit;
1159
1160    (pool.into_sorted(), stats)
1161}
1162
1163/// Perform a parallel search using Rayon
1164///
1165/// This function is part of the public API for library consumers who want
1166/// parallel search without statistics collection.
1167#[cfg(feature = "parallel")]
1168#[allow(dead_code)]
1169pub fn search_parallel(
1170    target: f64,
1171    gen_config: &crate::gen::GenConfig,
1172    max_matches: usize,
1173) -> Vec<Match> {
1174    let (matches, _stats) = search_parallel_with_stats(target, gen_config, max_matches);
1175    matches
1176}
1177
1178/// Perform a parallel search with statistics collection
1179///
1180/// This function is part of the public API for library consumers who want
1181/// detailed statistics about the parallel search process.
1182#[cfg(feature = "parallel")]
1183#[allow(dead_code)]
1184pub fn search_parallel_with_stats(
1185    target: f64,
1186    gen_config: &crate::gen::GenConfig,
1187    max_matches: usize,
1188) -> (Vec<Match>, SearchStats) {
1189    search_parallel_with_stats_and_options(target, gen_config, max_matches, false, None)
1190}
1191
1192/// Perform a parallel search with statistics collection and early exit options
1193#[cfg(feature = "parallel")]
1194pub fn search_parallel_with_stats_and_options(
1195    target: f64,
1196    gen_config: &crate::gen::GenConfig,
1197    max_matches: usize,
1198    stop_at_exact: bool,
1199    stop_below: Option<f64>,
1200) -> (Vec<Match>, SearchStats) {
1201    let config = SearchConfig {
1202        target,
1203        max_matches,
1204        stop_at_exact,
1205        stop_below,
1206        user_constants: gen_config.user_constants.clone(),
1207        user_functions: gen_config.user_functions.clone(),
1208        ..Default::default()
1209    };
1210
1211    search_parallel_with_stats_and_config(gen_config, &config)
1212}
1213
1214/// Perform a parallel search with a fully specified search configuration.
1215///
1216/// This function includes a safety fallback: if expression generation would
1217/// exceed ~2M expressions (which would risk OOM), it automatically switches
1218/// to streaming mode to avoid memory exhaustion.
1219#[cfg(feature = "parallel")]
1220pub fn search_parallel_with_stats_and_config(
1221    gen_config: &crate::gen::GenConfig,
1222    config: &SearchConfig,
1223) -> (Vec<Match>, SearchStats) {
1224    use crate::gen::generate_all_with_limit_and_context;
1225
1226    const MAX_EXPRESSIONS_BEFORE_STREAMING: usize = 2_000_000;
1227    let context = SearchContext::new(config);
1228
1229    let gen_start = SearchTimer::start();
1230
1231    // Try bounded generation first — if limit exceeded, fall back to streaming
1232    if let Some(generated) = generate_all_with_limit_and_context(
1233        gen_config,
1234        config.target,
1235        &context.eval,
1236        MAX_EXPRESSIONS_BEFORE_STREAMING,
1237    ) {
1238        let gen_time = gen_start.elapsed();
1239
1240        // Build database
1241        let mut db = ExprDatabase::new();
1242        db.insert_rhs(generated.rhs);
1243
1244        // Find matches with stats
1245        let (matches, mut stats) = db.find_matches_with_stats_and_context(&generated.lhs, &context);
1246
1247        // Add generation stats
1248        stats.gen_time = gen_time;
1249        stats.lhs_count = generated.lhs.len();
1250        stats.rhs_count = db.rhs_count();
1251
1252        (matches, stats)
1253    } else {
1254        // Limit exceeded — fall back to streaming mode which avoids OOM
1255        search_streaming_with_config(gen_config, config)
1256    }
1257}
ries_rs/search.rs

ries_rs/
search.rs