use criterion::{Criterion, Throughput, criterion_group, criterion_main};
use recoco::base::value::{BasicValue, Value};
use recoco::ops::interface::SimpleFunctionExecutor;
use std::hint::black_box;
use thread_ast_engine::tree_sitter::LanguageExt;
use thread_flow::functions::parse::ThreadParseExecutor;
const SMALL_RUST: &str = r#"
// Small Rust module for benchmarking
use std::collections::HashMap;
use std::sync::Arc;
#[derive(Debug, Clone)]
pub struct Config {
pub name: String,
pub value: i32,
}
impl Config {
pub fn new(name: String, value: i32) -> Self {
Self { name, value }
}
pub fn update(&mut self, value: i32) {
self.value = value;
}
}
pub fn process_data(input: &[i32]) -> Vec<i32> {
input.iter().map(|x| x * 2).collect()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_config() {
let cfg = Config::new("test".to_string(), 42);
assert_eq!(cfg.value, 42);
}
#[test]
fn test_process() {
let result = process_data(&[1, 2, 3]);
assert_eq!(result, vec![2, 4, 6]);
}
}
"#;
fn generate_medium_rust() -> String {
let mut code = String::from(
r#"
// Medium Rust module for benchmarking
use std::collections::{HashMap, HashSet};
use std::sync::{Arc, Mutex};
use std::error::Error;
#[derive(Debug, Clone)]
pub struct UserProfile {
pub id: u64,
pub name: String,
pub email: String,
pub roles: Vec<String>,
}
#[derive(Debug)]
pub struct UserManager {
users: Arc<Mutex<HashMap<u64, UserProfile>>>,
email_index: Arc<Mutex<HashMap<String, u64>>>,
}
impl UserManager {
pub fn new() -> Self {
Self {
users: Arc::new(Mutex::new(HashMap::new())),
email_index: Arc::new(Mutex::new(HashMap::new())),
}
}
pub fn add_user(&self, user: UserProfile) -> Result<(), Box<dyn Error>> {
let mut users = self.users.lock().unwrap();
let mut emails = self.email_index.lock().unwrap();
if emails.contains_key(&user.email) {
return Err("Email already exists".into());
}
emails.insert(user.email.clone(), user.id);
users.insert(user.id, user);
Ok(())
}
pub fn get_user(&self, id: u64) -> Option<UserProfile> {
self.users.lock().unwrap().get(&id).cloned()
}
pub fn find_by_email(&self, email: &str) -> Option<UserProfile> {
let emails = self.email_index.lock().unwrap();
let id = emails.get(email)?;
self.users.lock().unwrap().get(id).cloned()
}
pub fn update_user(&self, id: u64, name: String) -> Result<(), Box<dyn Error>> {
let mut users = self.users.lock().unwrap();
let user = users.get_mut(&id).ok_or("User not found")?;
user.name = name;
Ok(())
}
pub fn delete_user(&self, id: u64) -> Result<(), Box<dyn Error>> {
let mut users = self.users.lock().unwrap();
let user = users.remove(&id).ok_or("User not found")?;
let mut emails = self.email_index.lock().unwrap();
emails.remove(&user.email);
Ok(())
}
pub fn count(&self) -> usize {
self.users.lock().unwrap().len()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_add_user() {
let manager = UserManager::new();
let user = UserProfile {
id: 1,
name: "Test User".to_string(),
email: "test@example.com".to_string(),
roles: vec!["user".to_string()],
};
assert!(manager.add_user(user).is_ok());
assert_eq!(manager.count(), 1);
}
#[test]
fn test_duplicate_email() {
let manager = UserManager::new();
let user1 = UserProfile {
id: 1,
name: "User 1".to_string(),
email: "same@example.com".to_string(),
roles: vec![],
};
let user2 = UserProfile {
id: 2,
name: "User 2".to_string(),
email: "same@example.com".to_string(),
roles: vec![],
};
assert!(manager.add_user(user1).is_ok());
assert!(manager.add_user(user2).is_err());
}
#[test]
fn test_find_by_email() {
let manager = UserManager::new();
let user = UserProfile {
id: 1,
name: "Test".to_string(),
email: "find@example.com".to_string(),
roles: vec![],
};
manager.add_user(user).unwrap();
let found = manager.find_by_email("find@example.com");
assert!(found.is_some());
assert_eq!(found.unwrap().id, 1);
}
}
"#,
);
for i in 1..=5 {
code.push_str(&format!(
r#"
pub fn helper_function_{}(data: &[u8]) -> Vec<u8> {{
data.iter().map(|b| b.wrapping_add({})).collect()
}}
"#,
i, i
));
}
code
}
fn generate_large_rust() -> String {
let mut code = generate_medium_rust();
code.push_str(
r#"
#[derive(Debug, Clone)]
pub enum Operation {
Add(i64, i64),
Subtract(i64, i64),
Multiply(i64, i64),
Divide(i64, i64),
Power(i64, u32),
}
impl Operation {
pub fn execute(&self) -> Result<i64, String> {
match self {
Operation::Add(a, b) => Ok(a + b),
Operation::Subtract(a, b) => Ok(a - b),
Operation::Multiply(a, b) => Ok(a * b),
Operation::Divide(a, b) => {
if *b == 0 {
Err("Division by zero".to_string())
} else {
Ok(a / b)
}
}
Operation::Power(base, exp) => Ok(base.pow(*exp)),
}
}
}
pub struct Calculator {
history: Vec<Operation>,
}
impl Calculator {
pub fn new() -> Self {
Self { history: Vec::new() }
}
pub fn execute(&mut self, op: Operation) -> Result<i64, String> {
let result = op.execute()?;
self.history.push(op);
Ok(result)
}
pub fn clear_history(&mut self) {
self.history.clear();
}
pub fn history_len(&self) -> usize {
self.history.len()
}
}
"#,
);
for i in 1..=10 {
code.push_str(&format!(
r#"
pub trait Processor{} {{
fn process(&self, input: Vec<u8>) -> Vec<u8>;
}}
pub struct Impl{} {{
factor: u8,
}}
impl Processor{} for Impl{} {{
fn process(&self, input: Vec<u8>) -> Vec<u8> {{
input.iter().map(|b| b.wrapping_mul(self.factor)).collect()
}}
}}
impl Impl{} {{
pub fn new(factor: u8) -> Self {{
Self {{ factor }}
}}
}}
"#,
i, i, i, i, i
));
}
code
}
fn parse_direct(code: &str, language_ext: &str) -> usize {
let lang = thread_language::from_extension_str(language_ext)
.or_else(|| {
let p = std::path::PathBuf::from(format!("dummy.{}", language_ext));
thread_language::from_extension(&p)
})
.unwrap();
let root = lang.ast_grep(code);
root.root().text().len()
}
#[allow(dead_code)]
fn parse_with_recoco(code: &str, language: &str, path: &str) -> Value {
let executor = ThreadParseExecutor;
let input = vec![
Value::Basic(BasicValue::Str(code.to_string().into())),
Value::Basic(BasicValue::Str(language.to_string().into())),
Value::Basic(BasicValue::Str(path.to_string().into())),
];
tokio::runtime::Runtime::new()
.unwrap()
.block_on(executor.evaluate(input))
.unwrap()
}
fn benchmark_direct_parse_small(c: &mut Criterion) {
let mut group = c.benchmark_group("direct_parse");
group.throughput(Throughput::Bytes(SMALL_RUST.len() as u64));
group.bench_function("rust_small_50_lines", |b| {
b.iter(|| black_box(parse_direct(black_box(SMALL_RUST), black_box("rs"))));
});
group.finish();
}
fn benchmark_direct_parse_medium(c: &mut Criterion) {
let medium_code = generate_medium_rust();
let mut group = c.benchmark_group("direct_parse");
group.throughput(Throughput::Bytes(medium_code.len() as u64));
group.bench_function("rust_medium_200_lines", |b| {
b.iter(|| black_box(parse_direct(black_box(&medium_code), black_box("rs"))));
});
group.finish();
}
fn benchmark_direct_parse_large(c: &mut Criterion) {
let large_code = generate_large_rust();
let mut group = c.benchmark_group("direct_parse");
group.throughput(Throughput::Bytes(large_code.len() as u64));
group.bench_function("rust_large_500_lines", |b| {
b.iter(|| black_box(parse_direct(black_box(&large_code), black_box("rs"))));
});
group.finish();
}
fn benchmark_multi_file_sequential(c: &mut Criterion) {
let files = vec![
SMALL_RUST, SMALL_RUST, SMALL_RUST, SMALL_RUST, SMALL_RUST, SMALL_RUST, SMALL_RUST,
SMALL_RUST, SMALL_RUST, SMALL_RUST,
];
let total_bytes: usize = files.iter().map(|code| code.len()).sum();
let mut group = c.benchmark_group("multi_file_batch");
group.throughput(Throughput::Bytes(total_bytes as u64));
group.bench_function("sequential_10_small_files", |b| {
b.iter(|| {
for code in &files {
black_box(parse_direct(black_box(code), black_box("rs")));
}
});
});
group.finish();
}
fn benchmark_multi_file_mixed_sizes(c: &mut Criterion) {
let medium_code = generate_medium_rust();
let large_code = generate_large_rust();
let files = vec![
SMALL_RUST,
medium_code.as_str(),
SMALL_RUST,
large_code.as_str(),
SMALL_RUST,
medium_code.as_str(),
SMALL_RUST,
large_code.as_str(),
SMALL_RUST,
medium_code.as_str(),
];
let total_bytes: usize = files.iter().map(|code| code.len()).sum();
let mut group = c.benchmark_group("multi_file_batch");
group.throughput(Throughput::Bytes(total_bytes as u64));
group.bench_function("sequential_10_mixed_files", |b| {
b.iter(|| {
for code in &files {
black_box(parse_direct(black_box(code), black_box("rs")));
}
});
});
group.finish();
}
const SMALL_PYTHON: &str = r#"
# Small Python module for benchmarking
import json
from typing import List, Dict
class Config:
def __init__(self, name: str, value: int):
self.name = name
self.value = value
def update(self, value: int):
self.value = value
def process_data(data: List[int]) -> List[int]:
return [x * 2 for x in data]
def main():
cfg = Config("test", 42)
result = process_data([1, 2, 3])
print(result)
if __name__ == "__main__":
main()
"#;
const SMALL_TYPESCRIPT: &str = r#"
// Small TypeScript module for benchmarking
interface Config {
name: string;
value: number;
}
class ConfigManager {
private config: Config;
constructor(name: string, value: number) {
this.config = { name, value };
}
update(value: number): void {
this.config.value = value;
}
getValue(): number {
return this.config.value;
}
}
function processData(data: number[]): number[] {
return data.map(x => x * 2);
}
export { Config, ConfigManager, processData };
"#;
fn benchmark_language_comparison(c: &mut Criterion) {
let mut group = c.benchmark_group("language_comparison");
group.bench_function("rust_small", |b| {
b.iter(|| black_box(parse_direct(black_box(SMALL_RUST), black_box("rs"))))
});
group.bench_function("python_small", |b| {
b.iter(|| black_box(parse_direct(black_box(SMALL_PYTHON), black_box("py"))))
});
group.bench_function("typescript_small", |b| {
b.iter(|| black_box(parse_direct(black_box(SMALL_TYPESCRIPT), black_box("ts"))))
});
group.finish();
}
fn benchmark_throughput(c: &mut Criterion) {
let mut group = c.benchmark_group("throughput");
group.throughput(Throughput::Elements(1));
group.bench_function("files_per_second_small", |b| {
b.iter(|| black_box(parse_direct(black_box(SMALL_RUST), black_box("rs"))))
});
let medium_code = generate_medium_rust();
group.throughput(Throughput::Elements(1));
group.bench_function("files_per_second_medium", |b| {
b.iter(|| black_box(parse_direct(black_box(&medium_code), black_box("rs"))))
});
group.finish();
}
criterion_group!(
benches,
benchmark_direct_parse_small,
benchmark_direct_parse_medium,
benchmark_direct_parse_large,
benchmark_multi_file_sequential,
benchmark_multi_file_mixed_sizes,
benchmark_language_comparison,
benchmark_throughput,
);
criterion_main!(benches);