#define NANOTIME_IMPLEMENTATION
#include "nanotime.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <assert.h>
#include "exp_rs.h"
#include "common_allocator.h"
Real native_sin(const Real* args, uintptr_t nargs) { (void)nargs; return sin(args[0]); }
Real native_cos(const Real* args, uintptr_t nargs) { (void)nargs; return cos(args[0]); }
Real native_sqrt(const Real* args, uintptr_t nargs) { (void)nargs; return sqrt(args[0]); }
Real native_exp(const Real* args, uintptr_t nargs) { (void)nargs; return exp(args[0]); }
Real native_log(const Real* args, uintptr_t nargs) { (void)nargs; return log(args[0]); }
Real native_log10(const Real* args, uintptr_t nargs) { (void)nargs; return log10(args[0]); }
Real native_pow(const Real* args, uintptr_t nargs) { (void)nargs; return pow(args[0], args[1]); }
Real native_atan2(const Real* args, uintptr_t nargs) { (void)nargs; return atan2(args[0], args[1]); }
Real native_abs(const Real* args, uintptr_t nargs) { (void)nargs; return fabs(args[0]); }
Real native_sign(const Real* args, uintptr_t nargs) {
(void)nargs;
return (args[0] > 0) ? 1.0 : (args[0] < 0) ? -1.0 : 0.0;
}
Real native_min(const Real* args, uintptr_t nargs) { (void)nargs; return args[0] < args[1] ? args[0] : args[1]; }
Real native_max(const Real* args, uintptr_t nargs) { (void)nargs; return args[0] > args[1] ? args[0] : args[1]; }
Real native_fmod(const Real* args, uintptr_t nargs) { (void)nargs; return fmod(args[0], args[1]); }
volatile double g_sink = 0.0;
ExprContext* create_test_context() {
ExprContext* ctx = expr_context_new();
expr_context_add_function(ctx, "sin", 1, native_sin);
expr_context_add_function(ctx, "cos", 1, native_cos);
expr_context_add_function(ctx, "sqrt", 1, native_sqrt);
expr_context_add_function(ctx, "exp", 1, native_exp);
expr_context_add_function(ctx, "log", 1, native_log);
expr_context_add_function(ctx, "log10", 1, native_log10);
expr_context_add_function(ctx, "pow", 2, native_pow);
expr_context_add_function(ctx, "atan2", 2, native_atan2);
expr_context_add_function(ctx, "abs", 1, native_abs);
expr_context_add_function(ctx, "sign", 1, native_sign);
expr_context_add_function(ctx, "min", 2, native_min);
expr_context_add_function(ctx, "max", 2, native_max);
expr_context_add_function(ctx, "fmod", 2, native_fmod);
return ctx;
}
void benchmark_setup_overhead(uint64_t now_max) {
printf("\n=== Setup Overhead ===\n");
const uint64_t ctx_start = nanotime_now();
for (int i = 0; i < 100; i++) {
ExprContext* ctx = create_test_context();
expr_context_free(ctx);
}
const uint64_t ctx_end = nanotime_now();
const double ctx_us = nanotime_interval(ctx_start, ctx_end, now_max) / 1000.0 / 100.0;
printf(" Context creation + 13 functions: %.3f µs\n", ctx_us);
const uint64_t arena_start = nanotime_now();
for (int i = 0; i < 10000; i++) {
}
const uint64_t arena_end = nanotime_now();
const double arena_us = nanotime_interval(arena_start, arena_end, now_max) / 1000.0 / 10000.0;
printf(" Arena creation (32KB): %.3f µs\n", arena_us);
const uint64_t builder_start = nanotime_now();
for (int i = 0; i < 10000; i++) {
ExprBatch* builder = expr_batch_new(32768);
expr_batch_free(builder);
}
const uint64_t builder_end = nanotime_now();
const double builder_us = nanotime_interval(builder_start, builder_end, now_max) / 1000.0 / 10000.0;
printf(" Builder creation + reset: %.3f µs\n", builder_us);
}
void benchmark_parsing(ExprContext* ctx, uint64_t now_max) {
printf("\n=== Parsing Performance ===\n");
const char* expressions[] = {
"p0 + p1",
"p0 * p1 + p2",
"sqrt(p0*p0 + p1*p1)",
"sin(p0) * cos(p1)",
"log10(abs(p5) + 1) * p6",
"pow(p7, 2) + pow(p8, 2) + pow(p9, 2)",
"(exp(p0) - exp(-p0)) / 2" };
const char* param_names[] = {"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9"};
double param_values[] = {1.5, 2.3, 3.7, 0.5, 1.2, -0.8, 2.1, 0.9, 1.4, 0.7};
const uint64_t parse_start = nanotime_now();
for (int i = 0; i < 1000; i++) {
ExprBatch* builder = expr_batch_new(32768);
for (int p = 0; p < 10; p++) {
expr_batch_add_variable(builder, param_names[p], param_values[p]);
}
for (int e = 0; e < 7; e++) {
expr_batch_add_expression(builder, expressions[e]);
}
expr_batch_free(builder);
}
const uint64_t parse_end = nanotime_now();
const double parse_total_us = nanotime_interval(parse_start, parse_end, now_max) / 1000.0 / 1000.0;
printf(" Full setup (10 params + 7 exprs): %.3f µs\n", parse_total_us);
printf(" Per expression parsing: ~%.3f µs\n", parse_total_us / 7.0);
}
void benchmark_evaluation(ExprContext* ctx, uint64_t now_max) {
printf("\n=== Evaluation Performance ===\n");
const char* expressions[] = {
"p0 + p1",
"p0 * p1 + p2",
"sqrt(p0*p0 + p1*p1)",
"sin(p0) * cos(p1)",
"log10(abs(p5) + 1) * p6",
"pow(p7, 2) + pow(p8, 2) + pow(p9, 2)",
"(exp(p0) - exp(-p0)) / 2"
};
const char* param_names[] = {"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9"};
double param_values[] = {1.5, 2.3, 3.7, 0.5, 1.2, -0.8, 2.1, 0.9, 1.4, 0.7};
ExprBatch* eval_builder = expr_batch_new(32768);
for (int p = 0; p < 10; p++) {
expr_batch_add_variable(eval_builder, param_names[p], param_values[p]);
}
for (int e = 0; e < 7; e++) {
expr_batch_add_expression(eval_builder, expressions[e]);
}
const uint64_t eval_start = nanotime_now();
for (int i = 0; i < 100000; i++) {
expr_batch_evaluate(eval_builder, ctx);
for (int e = 0; e < 7; e++) {
g_sink += expr_batch_get_result(eval_builder, e);
}
}
const uint64_t eval_end = nanotime_now();
const double eval_us = nanotime_interval(eval_start, eval_end, now_max) / 1000.0 / 100000.0;
printf(" Batch eval (7 expressions): %.3f µs\n", eval_us);
printf(" Per expression: %.3f µs\n", eval_us / 7.0);
printf(" Evaluations per second: %.0f\n", 1e6 / eval_us);
const uint64_t param_start = nanotime_now();
for (int i = 0; i < 100000; i++) {
for (int p = 0; p < 10; p++) {
expr_batch_set_variable(eval_builder, p, param_values[p] + i * 0.0001);
}
}
const uint64_t param_end = nanotime_now();
const double param_us = nanotime_interval(param_start, param_end, now_max) / 1000.0 / 100000.0;
printf(" Parameter update (10 params): %.3f µs\n", param_us);
printf(" Per parameter: %.3f µs\n", param_us / 10.0);
const uint64_t cycle_start = nanotime_now();
for (int i = 0; i < 10000; i++) {
for (int p = 0; p < 10; p++) {
expr_batch_set_variable(eval_builder, p, param_values[p] + i * 0.001);
}
expr_batch_evaluate(eval_builder, ctx);
for (int e = 0; e < 7; e++) {
g_sink += expr_batch_get_result(eval_builder, e);
}
}
const uint64_t cycle_end = nanotime_now();
const double cycle_us = nanotime_interval(cycle_start, cycle_end, now_max) / 1000.0 / 10000.0;
printf(" Full update + eval cycle: %.3f µs\n", cycle_us);
printf(" Update rate: %.0f Hz\n", 1e6 / cycle_us);
printf("\n Individual Expression Timing:\n");
for (int e = 0; e < 7; e++) {
ExprBatch* single_builder = expr_batch_new(32768);
for (int p = 0; p < 10; p++) {
expr_batch_add_variable(single_builder, param_names[p], param_values[p]);
}
expr_batch_add_expression(single_builder, expressions[e]);
const uint64_t expr_start = nanotime_now();
for (int i = 0; i < 10000; i++) {
expr_batch_evaluate(single_builder, ctx);
g_sink += expr_batch_get_result(single_builder, 0);
}
const uint64_t expr_end = nanotime_now();
const double expr_us = nanotime_interval(expr_start, expr_end, now_max) / 1000.0 / 10000.0;
printf(" \"%s\": %.3f µs\n", expressions[e], expr_us);
expr_batch_free(single_builder);
}
expr_batch_free(eval_builder);
}
void benchmark_batch_scaling(ExprContext* ctx, uint64_t now_max) {
printf("\n=== Batch Size Scaling ===\n");
const char* expr = "sqrt(p0*p0 + p1*p1) + sin(p2) * p3";
const char* param_names[] = {"p0", "p1", "p2", "p3"};
double param_values[] = {3.0, 4.0, 1.57, 2.0};
int batch_sizes[] = {1, 10, 100, 1000};
printf(" Testing with expression: \"%s\"\n", expr);
printf(" Total operations kept constant at 100,000\n\n");
for (int b = 0; b < 4; b++) {
int batch_size = batch_sizes[b];
int iterations = 100000 / batch_size;
ExprBatch* batch_builder = expr_batch_new(32768);
for (int p = 0; p < 4; p++) {
expr_batch_add_variable(batch_builder, param_names[p], param_values[p]);
}
expr_batch_add_expression(batch_builder, expr);
const uint64_t start = nanotime_now();
for (int i = 0; i < iterations; i++) {
for (int j = 0; j < batch_size; j++) {
for (int p = 0; p < 4; p++) {
expr_batch_set_variable(batch_builder, p,
param_values[p] + (i * batch_size + j) * 0.001);
}
expr_batch_evaluate(batch_builder, ctx);
g_sink += expr_batch_get_result(batch_builder, 0);
}
}
const uint64_t end = nanotime_now();
const double elapsed = nanotime_interval(start, end, now_max) / 1000.0;
printf(" Batch size %4d: %.3f µs/item, %.2f ms total, %.0f items/sec\n",
batch_size,
elapsed / (iterations * batch_size),
elapsed / 1000.0,
(iterations * batch_size) / (elapsed / 1e6));
expr_batch_free(batch_builder);
}
}
void benchmark_arena_reuse(ExprContext* ctx, uint64_t now_max) {
printf("\n=== Arena Reuse Performance ===\n");
const char* expr = "p0 * sin(p1) + p2 * cos(p3)";
const char* param_names[] = {"p0", "p1", "p2", "p3"};
double param_values[] = {1.0, 0.5, 2.0, 1.0};
const int iterations = 10000;
const uint64_t new_start = nanotime_now();
for (int i = 0; i < iterations; i++) {
ExprBatch* builder = expr_batch_new(8192);
for (int p = 0; p < 4; p++) {
expr_batch_add_variable(builder, param_names[p], param_values[p]);
}
expr_batch_add_expression(builder, expr);
expr_batch_evaluate(builder, ctx);
expr_batch_free(builder);
}
const uint64_t new_end = nanotime_now();
const double new_us = nanotime_interval(new_start, new_end, now_max) / 1000.0 / iterations;
const uint64_t reuse_start = nanotime_now();
for (int i = 0; i < iterations; i++) {
ExprBatch* builder = expr_batch_new(8192);
for (int p = 0; p < 4; p++) {
expr_batch_add_variable(builder, param_names[p], param_values[p]);
}
expr_batch_add_expression(builder, expr);
expr_batch_evaluate(builder, ctx);
expr_batch_free(builder);
}
const uint64_t reuse_end = nanotime_now();
const double reuse_us = nanotime_interval(reuse_start, reuse_end, now_max) / 1000.0 / iterations;
printf(" New arena each time: %.3f µs/iteration\n", new_us);
printf(" Arena reuse: %.3f µs/iteration\n", reuse_us);
printf(" Speedup: %.1fx\n", new_us / reuse_us);
}
int main() {
init_memory_tracking();
printf("=== Expression Evaluation Performance Benchmark ===\n");
uint64_t now_max = nanotime_now_max();
printf("\nSystem info:\n");
printf(" Nanotime max: %llu\n", (unsigned long long)now_max);
printf(" Real size: %zu bytes\n", sizeof(Real));
ExprContext* ctx = create_test_context();
benchmark_setup_overhead(now_max);
benchmark_parsing(ctx, now_max);
benchmark_evaluation(ctx, now_max);
benchmark_batch_scaling(ctx, now_max);
benchmark_arena_reuse(ctx, now_max);
printf("\n=== Performance Summary ===\n");
printf(" Typical parse + eval cycle for 7 expressions:\n");
printf(" Setup (arena + builder + parse): ~20-30 µs\n");
printf(" Runtime evaluation: ~2-5 µs\n");
printf(" Parameter updates: ~1-2 µs\n");
printf(" Arena reuse provides 2-3x speedup for repeated operations\n");
printf(" Batch processing scales linearly with batch size\n");
expr_context_free(ctx);
printf("\n(Optimization prevention: %.6f)\n", g_sink);
return 0;
}