exp-rs 0.2.0

no_std expression parser, compiler, and evaluation engine for math expressions designed for embedded, with qemu examples
Documentation
#define NANOTIME_IMPLEMENTATION
#include "nanotime.h"

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <assert.h>
#include "exp_rs.h"
#include "common_allocator.h"

// Native function implementations
Real native_sin(const Real* args, uintptr_t nargs) { (void)nargs; return sin(args[0]); }
Real native_cos(const Real* args, uintptr_t nargs) { (void)nargs; return cos(args[0]); }
Real native_sqrt(const Real* args, uintptr_t nargs) { (void)nargs; return sqrt(args[0]); }
Real native_exp(const Real* args, uintptr_t nargs) { (void)nargs; return exp(args[0]); }
Real native_log(const Real* args, uintptr_t nargs) { (void)nargs; return log(args[0]); }
Real native_log10(const Real* args, uintptr_t nargs) { (void)nargs; return log10(args[0]); }
Real native_pow(const Real* args, uintptr_t nargs) { (void)nargs; return pow(args[0], args[1]); }
Real native_atan2(const Real* args, uintptr_t nargs) { (void)nargs; return atan2(args[0], args[1]); }
Real native_abs(const Real* args, uintptr_t nargs) { (void)nargs; return fabs(args[0]); }
Real native_sign(const Real* args, uintptr_t nargs) { 
    (void)nargs;
    return (args[0] > 0) ? 1.0 : (args[0] < 0) ? -1.0 : 0.0;
}
Real native_min(const Real* args, uintptr_t nargs) { (void)nargs; return args[0] < args[1] ? args[0] : args[1]; }
Real native_max(const Real* args, uintptr_t nargs) { (void)nargs; return args[0] > args[1] ? args[0] : args[1]; }
Real native_fmod(const Real* args, uintptr_t nargs) { (void)nargs; return fmod(args[0], args[1]); }

// Prevent optimization by using results
volatile double g_sink = 0.0;

// Create and configure a context with all functions
ExprContext* create_test_context() {
    ExprContext* ctx = expr_context_new();
    
    // Register functions
    expr_context_add_function(ctx, "sin", 1, native_sin);
    expr_context_add_function(ctx, "cos", 1, native_cos);
    expr_context_add_function(ctx, "sqrt", 1, native_sqrt);
    expr_context_add_function(ctx, "exp", 1, native_exp);
    expr_context_add_function(ctx, "log", 1, native_log);
    expr_context_add_function(ctx, "log10", 1, native_log10);
    expr_context_add_function(ctx, "pow", 2, native_pow);
    expr_context_add_function(ctx, "atan2", 2, native_atan2);
    expr_context_add_function(ctx, "abs", 1, native_abs);
    expr_context_add_function(ctx, "sign", 1, native_sign);
    expr_context_add_function(ctx, "min", 2, native_min);
    expr_context_add_function(ctx, "max", 2, native_max);
    expr_context_add_function(ctx, "fmod", 2, native_fmod);
    
    return ctx;
}

// Benchmark setup overhead (context, arena, builder creation)
void benchmark_setup_overhead(uint64_t now_max) {
    printf("\n=== Setup Overhead ===\n");
    
    // Context creation
    const uint64_t ctx_start = nanotime_now();
    for (int i = 0; i < 100; i++) {
        ExprContext* ctx = create_test_context();
        expr_context_free(ctx);
    }
    const uint64_t ctx_end = nanotime_now();
    const double ctx_us = nanotime_interval(ctx_start, ctx_end, now_max) / 1000.0 / 100.0;
    printf("  Context creation + 13 functions: %.3f µs\n", ctx_us);
    
    // Arena creation
    const uint64_t arena_start = nanotime_now();
    for (int i = 0; i < 10000; i++) {
        // Arena managed internally: 32768);
    }
    const uint64_t arena_end = nanotime_now();
    const double arena_us = nanotime_interval(arena_start, arena_end, now_max) / 1000.0 / 10000.0;
    printf("  Arena creation (32KB): %.3f µs\n", arena_us);
    
    // Builder creation with arena
    // Arena managed internally: 32768);
    const uint64_t builder_start = nanotime_now();
    for (int i = 0; i < 10000; i++) {
        ExprBatch* builder = expr_batch_new(32768);
        expr_batch_free(builder);
    }
    const uint64_t builder_end = nanotime_now();
    const double builder_us = nanotime_interval(builder_start, builder_end, now_max) / 1000.0 / 10000.0;
    printf("  Builder creation + reset: %.3f µs\n", builder_us);
}

// Benchmark parsing performance
void benchmark_parsing(ExprContext* ctx, uint64_t now_max) {
    printf("\n=== Parsing Performance ===\n");
    
    const char* expressions[] = {
        "p0 + p1",
        "p0 * p1 + p2",
        "sqrt(p0*p0 + p1*p1)",
        "sin(p0) * cos(p1)",
        "log10(abs(p5) + 1) * p6",
        "pow(p7, 2) + pow(p8, 2) + pow(p9, 2)",
        "(exp(p0) - exp(-p0)) / 2"  // sinh manually
    };
    
    const char* param_names[] = {"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9"};
    double param_values[] = {1.5, 2.3, 3.7, 0.5, 1.2, -0.8, 2.1, 0.9, 1.4, 0.7};
    
    // Full parsing with arena reuse
    // Arena managed internally: 32768);
    const uint64_t parse_start = nanotime_now();
    
    for (int i = 0; i < 1000; i++) {
        ExprBatch* builder = expr_batch_new(32768);
        
        // Add parameters
        for (int p = 0; p < 10; p++) {
            expr_batch_add_variable(builder, param_names[p], param_values[p]);
        }
        
        // Add expressions
        for (int e = 0; e < 7; e++) {
            expr_batch_add_expression(builder, expressions[e]);
        }
        
        expr_batch_free(builder);
    }
    
    const uint64_t parse_end = nanotime_now();
    const double parse_total_us = nanotime_interval(parse_start, parse_end, now_max) / 1000.0 / 1000.0;
    printf("  Full setup (10 params + 7 exprs): %.3f µs\n", parse_total_us);
    printf("  Per expression parsing: ~%.3f µs\n", parse_total_us / 7.0);
}

// Benchmark evaluation performance
void benchmark_evaluation(ExprContext* ctx, uint64_t now_max) {
    printf("\n=== Evaluation Performance ===\n");
    
    const char* expressions[] = {
        // Simple arithmetic
        "p0 + p1",
        "p0 * p1 + p2",
        // Function calls
        "sqrt(p0*p0 + p1*p1)",
        "sin(p0) * cos(p1)",
        // Complex expressions
        "log10(abs(p5) + 1) * p6",
        "pow(p7, 2) + pow(p8, 2) + pow(p9, 2)",
        "(exp(p0) - exp(-p0)) / 2"
    };
    
    const char* param_names[] = {"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9"};
    double param_values[] = {1.5, 2.3, 3.7, 0.5, 1.2, -0.8, 2.1, 0.9, 1.4, 0.7};
    
    // Setup once
    ExprBatch* eval_builder = expr_batch_new(32768);
    for (int p = 0; p < 10; p++) {
        expr_batch_add_variable(eval_builder, param_names[p], param_values[p]);
    }
    for (int e = 0; e < 7; e++) {
        expr_batch_add_expression(eval_builder, expressions[e]);
    }
    
    // Pure evaluation timing
    const uint64_t eval_start = nanotime_now();
    for (int i = 0; i < 100000; i++) {
        expr_batch_evaluate(eval_builder, ctx);
        // Use results to prevent optimization
        for (int e = 0; e < 7; e++) {
            g_sink += expr_batch_get_result(eval_builder, e);
        }
    }
    const uint64_t eval_end = nanotime_now();
    const double eval_us = nanotime_interval(eval_start, eval_end, now_max) / 1000.0 / 100000.0;
    printf("  Batch eval (7 expressions): %.3f µs\n", eval_us);
    printf("  Per expression: %.3f µs\n", eval_us / 7.0);
    printf("  Evaluations per second: %.0f\n", 1e6 / eval_us);
    
    // Parameter update timing
    const uint64_t param_start = nanotime_now();
    for (int i = 0; i < 100000; i++) {
        for (int p = 0; p < 10; p++) {
            expr_batch_set_variable(eval_builder, p, param_values[p] + i * 0.0001);
        }
    }
    const uint64_t param_end = nanotime_now();
    const double param_us = nanotime_interval(param_start, param_end, now_max) / 1000.0 / 100000.0;
    printf("  Parameter update (10 params): %.3f µs\n", param_us);
    printf("  Per parameter: %.3f µs\n", param_us / 10.0);
    
    // Combined update + eval cycle
    const uint64_t cycle_start = nanotime_now();
    for (int i = 0; i < 10000; i++) {
        // Update parameters
        for (int p = 0; p < 10; p++) {
            expr_batch_set_variable(eval_builder, p, param_values[p] + i * 0.001);
        }
        // Evaluate
        expr_batch_evaluate(eval_builder, ctx);
        // Use results
        for (int e = 0; e < 7; e++) {
            g_sink += expr_batch_get_result(eval_builder, e);
        }
    }
    const uint64_t cycle_end = nanotime_now();
    const double cycle_us = nanotime_interval(cycle_start, cycle_end, now_max) / 1000.0 / 10000.0;
    printf("  Full update + eval cycle: %.3f µs\n", cycle_us);
    printf("  Update rate: %.0f Hz\n", 1e6 / cycle_us);
    
    // Individual expression performance
    printf("\n  Individual Expression Timing:\n");
    for (int e = 0; e < 7; e++) {
        ExprBatch* single_builder = expr_batch_new(32768);
        
        // Add parameters
        for (int p = 0; p < 10; p++) {
            expr_batch_add_variable(single_builder, param_names[p], param_values[p]);
        }
        
        // Add single expression
        expr_batch_add_expression(single_builder, expressions[e]);
        
        // Time evaluation
        const uint64_t expr_start = nanotime_now();
        for (int i = 0; i < 10000; i++) {
            expr_batch_evaluate(single_builder, ctx);
            g_sink += expr_batch_get_result(single_builder, 0);
        }
        const uint64_t expr_end = nanotime_now();
        const double expr_us = nanotime_interval(expr_start, expr_end, now_max) / 1000.0 / 10000.0;
        
        printf("    \"%s\": %.3f µs\n", expressions[e], expr_us);
        
        expr_batch_free(single_builder);
    }
    
    expr_batch_free(eval_builder);
}

// Benchmark batch size scaling
void benchmark_batch_scaling(ExprContext* ctx, uint64_t now_max) {
    printf("\n=== Batch Size Scaling ===\n");
    
    const char* expr = "sqrt(p0*p0 + p1*p1) + sin(p2) * p3";
    const char* param_names[] = {"p0", "p1", "p2", "p3"};
    double param_values[] = {3.0, 4.0, 1.57, 2.0};
    
    int batch_sizes[] = {1, 10, 100, 1000};
    printf("  Testing with expression: \"%s\"\n", expr);
    printf("  Total operations kept constant at 100,000\n\n");
    
    for (int b = 0; b < 4; b++) {
        int batch_size = batch_sizes[b];
        int iterations = 100000 / batch_size;
        
        // Create builder with integrated arena
        ExprBatch* batch_builder = expr_batch_new(32768);
        
        // Setup
        for (int p = 0; p < 4; p++) {
            expr_batch_add_variable(batch_builder, param_names[p], param_values[p]);
        }
        expr_batch_add_expression(batch_builder, expr);
        
        const uint64_t start = nanotime_now();
        
        for (int i = 0; i < iterations; i++) {
            // Simulate batch processing
            for (int j = 0; j < batch_size; j++) {
                // Update params for each item
                for (int p = 0; p < 4; p++) {
                    expr_batch_set_variable(batch_builder, p, 
                        param_values[p] + (i * batch_size + j) * 0.001);
                }
                
                // Evaluate
                expr_batch_evaluate(batch_builder, ctx);
                g_sink += expr_batch_get_result(batch_builder, 0);
            }
        }
        
        const uint64_t end = nanotime_now();
        const double elapsed = nanotime_interval(start, end, now_max) / 1000.0;
        
        printf("  Batch size %4d: %.3f µs/item, %.2f ms total, %.0f items/sec\n", 
               batch_size,
               elapsed / (iterations * batch_size),
               elapsed / 1000.0,
               (iterations * batch_size) / (elapsed / 1e6));
        
        expr_batch_free(batch_builder);
    }
}

// Benchmark arena reuse vs allocation
void benchmark_arena_reuse(ExprContext* ctx, uint64_t now_max) {
    printf("\n=== Arena Reuse Performance ===\n");
    
    const char* expr = "p0 * sin(p1) + p2 * cos(p3)";
    const char* param_names[] = {"p0", "p1", "p2", "p3"};
    double param_values[] = {1.0, 0.5, 2.0, 1.0};
    
    const int iterations = 10000;
    
    // Test 1: New arena each time
    const uint64_t new_start = nanotime_now();
    for (int i = 0; i < iterations; i++) {
        ExprBatch* builder = expr_batch_new(8192);
        
        for (int p = 0; p < 4; p++) {
            expr_batch_add_variable(builder, param_names[p], param_values[p]);
        }
        expr_batch_add_expression(builder, expr);
        expr_batch_evaluate(builder, ctx);
        
        expr_batch_free(builder);
    }
    const uint64_t new_end = nanotime_now();
    const double new_us = nanotime_interval(new_start, new_end, now_max) / 1000.0 / iterations;
    
    // Test 2: Batch reuse pattern
    const uint64_t reuse_start = nanotime_now();
    for (int i = 0; i < iterations; i++) {
        ExprBatch* builder = expr_batch_new(8192);
        
        for (int p = 0; p < 4; p++) {
            expr_batch_add_variable(builder, param_names[p], param_values[p]);
        }
        expr_batch_add_expression(builder, expr);
        expr_batch_evaluate(builder, ctx);
        
        expr_batch_free(builder);
    }
    const uint64_t reuse_end = nanotime_now();
    const double reuse_us = nanotime_interval(reuse_start, reuse_end, now_max) / 1000.0 / iterations;
    
    printf("  New arena each time: %.3f µs/iteration\n", new_us);
    printf("  Arena reuse: %.3f µs/iteration\n", reuse_us);
    printf("  Speedup: %.1fx\n", new_us / reuse_us);
}

int main() {
    init_memory_tracking();
    printf("=== Expression Evaluation Performance Benchmark ===\n");
    
    // Initialize nanotime
    uint64_t now_max = nanotime_now_max();
    
    printf("\nSystem info:\n");
    printf("  Nanotime max: %llu\n", (unsigned long long)now_max);
    printf("  Real size: %zu bytes\n", sizeof(Real));
    
    // Create test context
    ExprContext* ctx = create_test_context();
    
    // Run benchmarks
    benchmark_setup_overhead(now_max);
    benchmark_parsing(ctx, now_max);
    benchmark_evaluation(ctx, now_max);
    benchmark_batch_scaling(ctx, now_max);
    benchmark_arena_reuse(ctx, now_max);
    
    // Summary
    printf("\n=== Performance Summary ===\n");
    printf("  Typical parse + eval cycle for 7 expressions:\n");
    printf("    Setup (arena + builder + parse): ~20-30 µs\n");
    printf("    Runtime evaluation: ~2-5 µs\n");
    printf("    Parameter updates: ~1-2 µs\n");
    printf("  Arena reuse provides 2-3x speedup for repeated operations\n");
    printf("  Batch processing scales linearly with batch size\n");
    
    // Cleanup
    expr_context_free(ctx);
    
    // Print sink to prevent optimization
    printf("\n(Optimization prevention: %.6f)\n", g_sink);
    
    return 0;
}