1#![allow(dead_code)]
7
8use crate::model::pattern::TriplePattern;
9use crate::model::{Object, Predicate, Subject, Term, Triple, Variable};
10use crate::query::algebra::TermPattern;
11use crate::query::plan::ExecutionPlan;
12use crate::OxirsError;
13use std::collections::HashMap;
14use std::sync::{Arc, RwLock};
15use std::time::{Duration, Instant};
16
17pub struct JitCompiler {
19 compiled_cache: Arc<RwLock<CompiledQueryCache>>,
21 execution_stats: Arc<RwLock<ExecutionStatistics>>,
23 config: JitConfig,
25}
26
27#[derive(Debug, Clone)]
29pub struct JitConfig {
30 pub compilation_threshold: usize,
32 pub max_cache_size: usize,
34 pub aggressive_opts: bool,
36 pub target_features: TargetFeatures,
38}
39
40#[derive(Debug, Clone)]
42pub struct TargetFeatures {
43 pub avx2: bool,
45 pub avx512: bool,
47 pub bmi2: bool,
49 pub vectorize: bool,
51}
52
53struct CompiledQueryCache {
55 queries: HashMap<QueryHash, CompiledQuery>,
57 total_size: usize,
59 access_order: Vec<QueryHash>,
61}
62
63struct CompiledQuery {
65 function: QueryFunction,
67 code_size: usize,
69 compile_time: Duration,
71 last_accessed: Instant,
73 execution_count: usize,
75}
76
77type QueryHash = u64;
79
80type QueryFunction = Arc<dyn Fn(&QueryContext) -> Result<QueryOutput, OxirsError> + Send + Sync>;
82
83pub struct QueryContext {
85 pub data: Arc<GraphData>,
87 pub bindings: HashMap<Variable, Term>,
89 pub limits: ExecutionLimits,
91}
92
93pub struct GraphData {
95 pub triples: Vec<Triple>,
97 pub indexes: QueryIndexes,
99}
100
101pub struct QueryIndexes {
103 pub by_subject: HashMap<Subject, Vec<usize>>,
105 pub by_predicate: HashMap<Predicate, Vec<usize>>,
107 pub by_object: HashMap<Object, Vec<usize>>,
109}
110
111#[derive(Debug, Clone)]
113pub struct ExecutionLimits {
114 pub max_results: usize,
116 pub timeout: Duration,
118 pub memory_limit: usize,
120}
121
122pub struct QueryOutput {
124 pub bindings: Vec<HashMap<Variable, Term>>,
126 pub stats: QueryStats,
128}
129
130#[derive(Debug, Clone)]
132pub struct QueryStats {
133 pub triples_scanned: usize,
135 pub results_count: usize,
137 pub execution_time: Duration,
139 pub memory_used: usize,
141}
142
143struct ExecutionStatistics {
145 query_counts: HashMap<QueryHash, usize>,
147 query_times: HashMap<QueryHash, Vec<Duration>>,
149 hot_threshold: usize,
151}
152
153impl JitCompiler {
154 pub fn new(config: JitConfig) -> Self {
156 Self {
157 compiled_cache: Arc::new(RwLock::new(CompiledQueryCache::new())),
158 execution_stats: Arc::new(RwLock::new(ExecutionStatistics::new(
159 config.compilation_threshold,
160 ))),
161 config,
162 }
163 }
164
165 pub fn execute(
167 &self,
168 plan: &ExecutionPlan,
169 context: QueryContext,
170 ) -> Result<QueryOutput, OxirsError> {
171 let hash = self.hash_plan(plan);
172
173 if let Some(compiled) = self.get_compiled(hash) {
175 return (compiled)(&context);
176 }
177
178 let start = Instant::now();
180 let result = self.execute_interpreted(plan, &context)?;
181 let execution_time = start.elapsed();
182
183 self.update_stats(hash, execution_time);
185
186 if self.should_compile(hash) {
188 self.compile_plan(plan, hash)?;
189 }
190
191 Ok(result)
192 }
193
194 fn hash_plan(&self, plan: &ExecutionPlan) -> QueryHash {
196 use std::collections::hash_map::DefaultHasher;
197 use std::hash::{Hash, Hasher};
198
199 let mut hasher = DefaultHasher::new();
200 format!("{plan:?}").hash(&mut hasher);
201 hasher.finish()
202 }
203
204 fn get_compiled(&self, hash: QueryHash) -> Option<QueryFunction> {
206 let cache = self.compiled_cache.read().ok()?;
207 cache.queries.get(&hash).map(|q| {
208 q.function.clone()
210 })
211 }
212
213 fn execute_interpreted(
215 &self,
216 plan: &ExecutionPlan,
217 context: &QueryContext,
218 ) -> Result<QueryOutput, OxirsError> {
219 match plan {
220 ExecutionPlan::TripleScan { pattern } => self.execute_triple_scan(pattern, context),
221 ExecutionPlan::HashJoin {
222 left,
223 right,
224 join_vars,
225 } => self.execute_hash_join(left, right, join_vars, context),
226 _ => Err(OxirsError::Query("Plan type not supported".to_string())),
227 }
228 }
229
230 fn execute_triple_scan(
232 &self,
233 pattern: &TriplePattern,
234 context: &QueryContext,
235 ) -> Result<QueryOutput, OxirsError> {
236 let mut results = Vec::new();
237 let mut stats = QueryStats {
238 triples_scanned: 0,
239 results_count: 0,
240 execution_time: Duration::ZERO,
241 memory_used: 0,
242 };
243
244 let start = Instant::now();
245
246 for triple in context.data.triples.iter() {
248 stats.triples_scanned += 1;
249
250 if let Some(bindings) = self.match_triple(triple, pattern, &context.bindings) {
251 results.push(bindings);
252 stats.results_count += 1;
253
254 if results.len() >= context.limits.max_results {
255 break;
256 }
257 }
258 }
259
260 stats.execution_time = start.elapsed();
261 stats.memory_used = results.len() * std::mem::size_of::<HashMap<Variable, Term>>();
262
263 Ok(QueryOutput {
264 bindings: results,
265 stats,
266 })
267 }
268
269 fn match_triple(
271 &self,
272 triple: &Triple,
273 pattern: &crate::model::pattern::TriplePattern,
274 existing: &HashMap<Variable, Term>,
275 ) -> Option<HashMap<Variable, Term>> {
276 let mut bindings = existing.clone();
277
278 if let Some(ref subject_pattern) = pattern.subject {
280 if !self.match_subject_pattern(triple.subject(), subject_pattern, &mut bindings) {
281 return None;
282 }
283 }
284
285 if let Some(ref predicate_pattern) = pattern.predicate {
287 if !self.match_predicate_pattern(triple.predicate(), predicate_pattern, &mut bindings) {
288 return None;
289 }
290 }
291
292 if let Some(ref object_pattern) = pattern.object {
294 if !self.match_object_pattern(triple.object(), object_pattern, &mut bindings) {
295 return None;
296 }
297 }
298
299 Some(bindings)
300 }
301
302 fn match_term(
304 &self,
305 term: &Term,
306 pattern: &TermPattern,
307 bindings: &mut HashMap<Variable, Term>,
308 ) -> bool {
309 match pattern {
310 TermPattern::Variable(var) => {
311 if let Some(bound) = bindings.get(var) {
312 bound == term
313 } else {
314 bindings.insert(var.clone(), term.clone());
315 true
316 }
317 }
318 TermPattern::NamedNode(n) => {
319 matches!(term, Term::NamedNode(nn) if nn == n)
320 }
321 TermPattern::Literal(l) => {
322 matches!(term, Term::Literal(lit) if lit == l)
323 }
324 TermPattern::BlankNode(b) => {
325 matches!(term, Term::BlankNode(bn) if bn == b)
326 }
327 }
328 }
329
330 fn match_subject_pattern(
332 &self,
333 subject: &Subject,
334 pattern: &crate::model::pattern::SubjectPattern,
335 bindings: &mut HashMap<Variable, Term>,
336 ) -> bool {
337 use crate::model::pattern::SubjectPattern;
338 match pattern {
339 SubjectPattern::Variable(var) => {
340 let term = Term::from_subject(subject);
341 if let Some(bound_value) = bindings.get(var) {
342 bound_value == &term
343 } else {
344 bindings.insert(var.clone(), term);
345 true
346 }
347 }
348 SubjectPattern::NamedNode(n) => matches!(subject, Subject::NamedNode(nn) if nn == n),
349 SubjectPattern::BlankNode(b) => matches!(subject, Subject::BlankNode(bn) if bn == b),
350 }
351 }
352
353 fn match_predicate_pattern(
355 &self,
356 predicate: &Predicate,
357 pattern: &crate::model::pattern::PredicatePattern,
358 bindings: &mut HashMap<Variable, Term>,
359 ) -> bool {
360 use crate::model::pattern::PredicatePattern;
361 match pattern {
362 PredicatePattern::Variable(var) => {
363 let term = Term::from_predicate(predicate);
364 if let Some(bound_value) = bindings.get(var) {
365 bound_value == &term
366 } else {
367 bindings.insert(var.clone(), term);
368 true
369 }
370 }
371 PredicatePattern::NamedNode(n) => {
372 matches!(predicate, Predicate::NamedNode(nn) if nn == n)
373 }
374 }
375 }
376
377 fn match_object_pattern(
379 &self,
380 object: &Object,
381 pattern: &crate::model::pattern::ObjectPattern,
382 bindings: &mut HashMap<Variable, Term>,
383 ) -> bool {
384 use crate::model::pattern::ObjectPattern;
385 match pattern {
386 ObjectPattern::Variable(var) => {
387 let term = Term::from_object(object);
388 if let Some(bound_value) = bindings.get(var) {
389 bound_value == &term
390 } else {
391 bindings.insert(var.clone(), term);
392 true
393 }
394 }
395 ObjectPattern::NamedNode(n) => matches!(object, Object::NamedNode(nn) if nn == n),
396 ObjectPattern::BlankNode(b) => matches!(object, Object::BlankNode(bn) if bn == b),
397 ObjectPattern::Literal(l) => matches!(object, Object::Literal(lit) if lit == l),
398 }
399 }
400
401 fn execute_hash_join(
403 &self,
404 left: &ExecutionPlan,
405 right: &ExecutionPlan,
406 join_vars: &[Variable],
407 context: &QueryContext,
408 ) -> Result<QueryOutput, OxirsError> {
409 let left_output = self.execute_interpreted(left, context)?;
411
412 let mut hash_table: HashMap<Vec<Term>, Vec<HashMap<Variable, Term>>> = HashMap::new();
414
415 for binding in left_output.bindings {
416 let key: Vec<Term> = join_vars
417 .iter()
418 .filter_map(|var| binding.get(var).cloned())
419 .collect();
420 hash_table.entry(key).or_default().push(binding);
421 }
422
423 let right_output = self.execute_interpreted(right, context)?;
425 let mut results = Vec::new();
426
427 for right_binding in right_output.bindings {
428 let key: Vec<Term> = join_vars
429 .iter()
430 .filter_map(|var| right_binding.get(var).cloned())
431 .collect();
432
433 if let Some(left_bindings) = hash_table.get(&key) {
434 for left_binding in left_bindings {
435 let mut merged = left_binding.clone();
436 merged.extend(right_binding.clone());
437 results.push(merged);
438 }
439 }
440 }
441
442 let results_count = results.len();
443 Ok(QueryOutput {
444 bindings: results,
445 stats: QueryStats {
446 triples_scanned: left_output.stats.triples_scanned
447 + right_output.stats.triples_scanned,
448 results_count,
449 execution_time: left_output.stats.execution_time
450 + right_output.stats.execution_time,
451 memory_used: left_output.stats.memory_used + right_output.stats.memory_used,
452 },
453 })
454 }
455
456 fn update_stats(&self, hash: QueryHash, execution_time: Duration) {
458 if let Ok(mut stats) = self.execution_stats.write() {
459 *stats.query_counts.entry(hash).or_insert(0) += 1;
460 stats
461 .query_times
462 .entry(hash)
463 .or_default()
464 .push(execution_time);
465 }
466 }
467
468 fn should_compile(&self, hash: QueryHash) -> bool {
470 if let Ok(stats) = self.execution_stats.read() {
471 if let Some(&count) = stats.query_counts.get(&hash) {
472 return count >= stats.hot_threshold;
473 }
474 }
475 false
476 }
477
478 fn compile_plan(&self, plan: &ExecutionPlan, hash: QueryHash) -> Result<(), OxirsError> {
480 let start = Instant::now();
481
482 let compiled = match plan {
484 ExecutionPlan::TripleScan { pattern } => self.compile_triple_scan(pattern)?,
485 ExecutionPlan::HashJoin {
486 left,
487 right,
488 join_vars,
489 } => self.compile_hash_join(left, right, join_vars)?,
490 _ => return Err(OxirsError::Query("Cannot compile plan type".to_string())),
491 };
492
493 let compile_time = start.elapsed();
494
495 if let Ok(mut cache) = self.compiled_cache.write() {
497 cache.add(
498 hash,
499 CompiledQuery {
500 function: compiled,
501 code_size: 1024, compile_time,
503 last_accessed: Instant::now(),
504 execution_count: 0,
505 },
506 );
507 }
508
509 Ok(())
510 }
511
512 fn compile_triple_scan(
514 &self,
515 pattern: &crate::model::pattern::TriplePattern,
516 ) -> Result<QueryFunction, OxirsError> {
517 let pattern = pattern.clone();
519
520 Ok(Arc::new(move |context: &QueryContext| {
521 let mut results = Vec::new();
522
523 if let Some(crate::model::pattern::PredicatePattern::NamedNode(pred)) =
525 &pattern.predicate
526 {
527 if let Some(indices) = context.data.indexes.by_predicate.get(&pred.clone().into()) {
529 for &idx in indices {
530 let triple = &context.data.triples[idx];
531 if let Some(bindings) =
533 match_triple_fast(triple, &pattern, &context.bindings)
534 {
535 results.push(bindings);
536 }
537 }
538 }
539 } else {
540 for triple in &context.data.triples {
542 if let Some(bindings) = match_triple_fast(triple, &pattern, &context.bindings) {
543 results.push(bindings);
544 }
545 }
546 }
547
548 let results_count = results.len();
549 Ok(QueryOutput {
550 bindings: results,
551 stats: QueryStats {
552 triples_scanned: context.data.triples.len(),
553 results_count,
554 execution_time: Duration::ZERO,
555 memory_used: 0,
556 },
557 })
558 }))
559 }
560
561 fn compile_hash_join(
563 &self,
564 _left: &ExecutionPlan,
565 _right: &ExecutionPlan,
566 _join_vars: &[Variable],
567 ) -> Result<QueryFunction, OxirsError> {
568 Ok(Arc::new(move |_context: &QueryContext| {
570 Ok(QueryOutput {
571 bindings: Vec::new(),
572 stats: QueryStats {
573 triples_scanned: 0,
574 results_count: 0,
575 execution_time: Duration::ZERO,
576 memory_used: 0,
577 },
578 })
579 }))
580 }
581}
582
583fn match_triple_fast(
585 triple: &Triple,
586 pattern: &crate::model::pattern::TriplePattern,
587 bindings: &HashMap<Variable, Term>,
588) -> Option<HashMap<Variable, Term>> {
589 let mut result = bindings.clone();
590
591 if let Some(ref subject_pattern) = pattern.subject {
593 use crate::model::pattern::SubjectPattern;
594 match subject_pattern {
595 SubjectPattern::Variable(v) => {
596 if let Some(bound) = bindings.get(v) {
597 if bound != &Term::from_subject(triple.subject()) {
598 return None;
599 }
600 } else {
601 result.insert(v.clone(), Term::from_subject(triple.subject()));
602 }
603 }
604 SubjectPattern::NamedNode(n) => {
605 if let Subject::NamedNode(nn) = triple.subject() {
606 if nn != n {
607 return None;
608 }
609 } else {
610 return None;
611 }
612 }
613 SubjectPattern::BlankNode(b) => {
614 if let Subject::BlankNode(bn) = triple.subject() {
615 if bn != b {
616 return None;
617 }
618 } else {
619 return None;
620 }
621 }
622 }
623 }
624
625 Some(result)
628}
629
630impl CompiledQueryCache {
631 fn new() -> Self {
632 Self {
633 queries: HashMap::new(),
634 total_size: 0,
635 access_order: Vec::new(),
636 }
637 }
638
639 fn add(&mut self, hash: QueryHash, query: CompiledQuery) {
640 self.total_size += query.code_size;
641 self.queries.insert(hash, query);
642 self.access_order.push(hash);
643
644 while self.total_size > 100 * 1024 * 1024 {
646 if let Some(oldest) = self.access_order.first() {
648 if let Some(removed) = self.queries.remove(oldest) {
649 self.total_size -= removed.code_size;
650 }
651 self.access_order.remove(0);
652 } else {
653 break;
654 }
655 }
656 }
657}
658
659impl ExecutionStatistics {
660 fn new(hot_threshold: usize) -> Self {
661 Self {
662 query_counts: HashMap::new(),
663 query_times: HashMap::new(),
664 hot_threshold,
665 }
666 }
667}
668
669impl Default for JitConfig {
670 fn default() -> Self {
671 Self {
672 compilation_threshold: 100,
673 max_cache_size: 100 * 1024 * 1024, aggressive_opts: true,
675 target_features: TargetFeatures {
676 avx2: cfg!(target_feature = "avx2"),
677 avx512: cfg!(target_feature = "avx512f"),
678 bmi2: cfg!(target_feature = "bmi2"),
679 vectorize: true,
680 },
681 }
682 }
683}
684
685pub mod codegen {
687 use super::*;
688
689 pub struct LlvmCodeGen {
691 target: TargetConfig,
693 }
694
695 pub struct TargetConfig {
697 pub arch: String,
699 pub features: String,
701 pub opt_level: OptLevel,
703 }
704
705 pub enum OptLevel {
707 None,
708 Less,
709 Default,
710 Aggressive,
711 }
712
713 impl LlvmCodeGen {
714 pub fn gen_triple_scan(&self, _pattern: &TriplePattern) -> Vec<u8> {
716 vec![0x90] }
719
720 pub fn gen_vector_compare(&self) -> Vec<u8> {
722 vec![0x90] }
725 }
726}
727
728#[cfg(test)]
729mod tests {
730 use super::*;
731
732 #[test]
733 fn test_jit_compiler_creation() {
734 let config = JitConfig::default();
735 let compiler = JitCompiler::new(config);
736
737 let stats = compiler.execution_stats.read().unwrap();
738 assert_eq!(stats.query_counts.len(), 0);
739 }
740
741 #[test]
742 fn test_query_hashing() {
743 let compiler = JitCompiler::new(JitConfig::default());
744
745 let plan = ExecutionPlan::TripleScan {
746 pattern: crate::model::pattern::TriplePattern::new(
747 Some(crate::model::pattern::SubjectPattern::Variable(
748 Variable::new("?s").unwrap(),
749 )),
750 Some(crate::model::pattern::PredicatePattern::Variable(
751 Variable::new("?p").unwrap(),
752 )),
753 Some(crate::model::pattern::ObjectPattern::Variable(
754 Variable::new("?o").unwrap(),
755 )),
756 ),
757 };
758
759 let hash1 = compiler.hash_plan(&plan);
760 let hash2 = compiler.hash_plan(&plan);
761
762 assert_eq!(hash1, hash2);
763 }
764}