Skip to main content

shape_runtime/
code_search.rs

1//! Structural code search over content-addressed function blobs.
2//!
3//! Enables querying functions by signature, dependency patterns,
4//! instruction patterns, and structural properties.
5
6use std::collections::{HashMap, HashSet};
7
8/// A searchable index over a set of function blobs.
9pub struct CodeIndex {
10    /// All indexed functions by hash.
11    functions: HashMap<[u8; 32], IndexedFunction>,
12    /// Functions indexed by arity.
13    by_arity: HashMap<u16, Vec<[u8; 32]>>,
14    /// Functions indexed by callee (dependency).
15    by_callee: HashMap<[u8; 32], Vec<[u8; 32]>>,
16    /// Functions indexed by referenced type schema.
17    by_type_schema: HashMap<String, Vec<[u8; 32]>>,
18    /// Functions indexed by name.
19    by_name: HashMap<String, [u8; 32]>,
20}
21
22/// Indexed metadata about a function.
23#[derive(Debug, Clone)]
24pub struct IndexedFunction {
25    pub hash: [u8; 32],
26    pub name: String,
27    pub arity: u16,
28    pub instruction_count: usize,
29    pub dependencies: Vec<[u8; 32]>,
30    pub type_schemas: Vec<String>,
31    pub is_async: bool,
32    pub is_closure: bool,
33    pub has_captures: bool,
34}
35
36/// Query for searching functions.
37#[derive(Debug, Clone, Default)]
38pub struct FunctionQuery {
39    pub name_pattern: Option<String>,
40    pub arity: Option<u16>,
41    pub min_instructions: Option<usize>,
42    pub max_instructions: Option<usize>,
43    pub calls_function: Option<[u8; 32]>,
44    pub uses_type: Option<String>,
45    pub is_async: Option<bool>,
46    pub is_closure: Option<bool>,
47}
48
49/// Result of a code search query.
50#[derive(Debug, Clone)]
51pub struct SearchResult {
52    pub matches: Vec<IndexedFunction>,
53    pub total_indexed: usize,
54}
55
56impl CodeIndex {
57    /// Create a new empty index.
58    pub fn new() -> Self {
59        Self {
60            functions: HashMap::new(),
61            by_arity: HashMap::new(),
62            by_callee: HashMap::new(),
63            by_type_schema: HashMap::new(),
64            by_name: HashMap::new(),
65        }
66    }
67
68    /// Add a function to the index.
69    #[allow(clippy::too_many_arguments)]
70    pub fn index_function(
71        &mut self,
72        hash: [u8; 32],
73        name: String,
74        arity: u16,
75        instruction_count: usize,
76        dependencies: Vec<[u8; 32]>,
77        type_schemas: Vec<String>,
78        is_async: bool,
79        is_closure: bool,
80        captures_count: u16,
81    ) {
82        let func = IndexedFunction {
83            hash,
84            name: name.clone(),
85            arity,
86            instruction_count,
87            dependencies: dependencies.clone(),
88            type_schemas: type_schemas.clone(),
89            is_async,
90            is_closure,
91            has_captures: captures_count > 0,
92        };
93
94        // Index by arity.
95        self.by_arity.entry(arity).or_default().push(hash);
96
97        // Index by callee (each dependency is a function this blob calls).
98        for dep in &dependencies {
99            self.by_callee.entry(*dep).or_default().push(hash);
100        }
101
102        // Index by type schema.
103        for schema in &type_schemas {
104            self.by_type_schema
105                .entry(schema.clone())
106                .or_default()
107                .push(hash);
108        }
109
110        // Index by name.
111        self.by_name.insert(name, hash);
112
113        // Store the function.
114        self.functions.insert(hash, func);
115    }
116
117    /// Execute a query against the index, returning all functions that match
118    /// every specified criterion.
119    pub fn search(&self, query: &FunctionQuery) -> SearchResult {
120        let total_indexed = self.functions.len();
121
122        // Start with candidate sets from indexed lookups, then intersect.
123        let mut candidates: Option<HashSet<[u8; 32]>> = None;
124
125        // Narrow by arity if specified.
126        if let Some(arity) = query.arity {
127            let set: HashSet<[u8; 32]> = self
128                .by_arity
129                .get(&arity)
130                .map(|v| v.iter().copied().collect())
131                .unwrap_or_default();
132            candidates = Some(match candidates {
133                Some(c) => c.intersection(&set).copied().collect(),
134                None => set,
135            });
136        }
137
138        // Narrow by callee if specified.
139        if let Some(ref callee) = query.calls_function {
140            let set: HashSet<[u8; 32]> = self
141                .by_callee
142                .get(callee)
143                .map(|v| v.iter().copied().collect())
144                .unwrap_or_default();
145            candidates = Some(match candidates {
146                Some(c) => c.intersection(&set).copied().collect(),
147                None => set,
148            });
149        }
150
151        // Narrow by type schema if specified.
152        if let Some(ref type_name) = query.uses_type {
153            let set: HashSet<[u8; 32]> = self
154                .by_type_schema
155                .get(type_name)
156                .map(|v| v.iter().copied().collect())
157                .unwrap_or_default();
158            candidates = Some(match candidates {
159                Some(c) => c.intersection(&set).copied().collect(),
160                None => set,
161            });
162        }
163
164        // If no indexed filter was applied, start with all functions.
165        let candidate_iter: Box<dyn Iterator<Item = &IndexedFunction>> = match candidates {
166            Some(ref set) => Box::new(set.iter().filter_map(|h| self.functions.get(h))),
167            None => Box::new(self.functions.values()),
168        };
169
170        // Apply remaining filters that require scanning.
171        let matches: Vec<IndexedFunction> = candidate_iter
172            .filter(|f| {
173                if let Some(ref pattern) = query.name_pattern {
174                    if !f.name.contains(pattern.as_str()) {
175                        return false;
176                    }
177                }
178                if let Some(min) = query.min_instructions {
179                    if f.instruction_count < min {
180                        return false;
181                    }
182                }
183                if let Some(max) = query.max_instructions {
184                    if f.instruction_count > max {
185                        return false;
186                    }
187                }
188                if let Some(want_async) = query.is_async {
189                    if f.is_async != want_async {
190                        return false;
191                    }
192                }
193                if let Some(want_closure) = query.is_closure {
194                    if f.is_closure != want_closure {
195                        return false;
196                    }
197                }
198                true
199            })
200            .cloned()
201            .collect();
202
203        SearchResult {
204            matches,
205            total_indexed,
206        }
207    }
208
209    /// Find functions that call the given function hash (i.e. have it as a dependency).
210    pub fn find_callers(&self, function_hash: [u8; 32]) -> Vec<IndexedFunction> {
211        self.by_callee
212            .get(&function_hash)
213            .map(|hashes| {
214                hashes
215                    .iter()
216                    .filter_map(|h| self.functions.get(h).cloned())
217                    .collect()
218            })
219            .unwrap_or_default()
220    }
221
222    /// Find functions called by the given function hash (its direct dependencies).
223    pub fn find_callees(&self, function_hash: [u8; 32]) -> Vec<IndexedFunction> {
224        self.functions
225            .get(&function_hash)
226            .map(|f| {
227                f.dependencies
228                    .iter()
229                    .filter_map(|h| self.functions.get(h).cloned())
230                    .collect()
231            })
232            .unwrap_or_default()
233    }
234
235    /// Compute the transitive dependency depth for a function.
236    ///
237    /// Returns `None` if the function is not in the index.
238    /// A function with no dependencies has depth 0.
239    /// A function that calls only leaf functions has depth 1, etc.
240    ///
241    /// Cycles are detected and treated as already-visited (depth 0 contribution).
242    pub fn dependency_depth(&self, function_hash: [u8; 32]) -> Option<usize> {
243        let func = self.functions.get(&function_hash)?;
244        if func.dependencies.is_empty() {
245            return Some(0);
246        }
247
248        // BFS with memoization.
249        let mut memo: HashMap<[u8; 32], usize> = HashMap::new();
250        Some(self.compute_depth(function_hash, &mut memo, &mut HashSet::new()))
251    }
252
253    /// Recursive depth computation with cycle detection.
254    fn compute_depth(
255        &self,
256        hash: [u8; 32],
257        memo: &mut HashMap<[u8; 32], usize>,
258        visiting: &mut HashSet<[u8; 32]>,
259    ) -> usize {
260        if let Some(&cached) = memo.get(&hash) {
261            return cached;
262        }
263        if visiting.contains(&hash) {
264            // Cycle detected; treat as depth 0 to break the loop.
265            return 0;
266        }
267
268        let deps = match self.functions.get(&hash) {
269            Some(f) => &f.dependencies,
270            None => {
271                memo.insert(hash, 0);
272                return 0;
273            }
274        };
275
276        if deps.is_empty() {
277            memo.insert(hash, 0);
278            return 0;
279        }
280
281        visiting.insert(hash);
282        let max_child = deps
283            .iter()
284            .map(|d| self.compute_depth(*d, memo, visiting))
285            .max()
286            .unwrap_or(0);
287        visiting.remove(&hash);
288
289        let depth = max_child + 1;
290        memo.insert(hash, depth);
291        depth
292    }
293
294    /// Return the total number of indexed functions.
295    pub fn len(&self) -> usize {
296        self.functions.len()
297    }
298
299    /// Return whether the index is empty.
300    pub fn is_empty(&self) -> bool {
301        self.functions.is_empty()
302    }
303}
304
305impl Default for CodeIndex {
306    fn default() -> Self {
307        Self::new()
308    }
309}
310
311// ── FunctionQuery builder methods ──────────────────────────────────────────
312
313impl FunctionQuery {
314    /// Create a new empty query.
315    pub fn new() -> Self {
316        Self::default()
317    }
318
319    /// Filter by name substring.
320    pub fn with_name_pattern(mut self, pattern: impl Into<String>) -> Self {
321        self.name_pattern = Some(pattern.into());
322        self
323    }
324
325    /// Filter by exact arity.
326    pub fn with_arity(mut self, arity: u16) -> Self {
327        self.arity = Some(arity);
328        self
329    }
330
331    /// Filter by minimum instruction count.
332    pub fn with_min_instructions(mut self, min: usize) -> Self {
333        self.min_instructions = Some(min);
334        self
335    }
336
337    /// Filter by maximum instruction count.
338    pub fn with_max_instructions(mut self, max: usize) -> Self {
339        self.max_instructions = Some(max);
340        self
341    }
342
343    /// Filter to functions that call a specific function hash.
344    pub fn with_calls_function(mut self, hash: [u8; 32]) -> Self {
345        self.calls_function = Some(hash);
346        self
347    }
348
349    /// Filter to functions that reference a type schema.
350    pub fn with_uses_type(mut self, type_name: impl Into<String>) -> Self {
351        self.uses_type = Some(type_name.into());
352        self
353    }
354
355    /// Filter by async status.
356    pub fn with_async(mut self, is_async: bool) -> Self {
357        self.is_async = Some(is_async);
358        self
359    }
360
361    /// Filter by closure status.
362    pub fn with_closure(mut self, is_closure: bool) -> Self {
363        self.is_closure = Some(is_closure);
364        self
365    }
366}
367
368#[cfg(test)]
369mod tests {
370    use super::*;
371
372    fn make_hash(seed: u8) -> [u8; 32] {
373        let mut h = [0u8; 32];
374        h[0] = seed;
375        h
376    }
377
378    fn build_test_index() -> CodeIndex {
379        let mut idx = CodeIndex::new();
380
381        // Leaf function: add(a, b) - sync, not a closure
382        idx.index_function(
383            make_hash(1),
384            "add".into(),
385            2,
386            5,
387            vec![],
388            vec![],
389            false,
390            false,
391            0,
392        );
393
394        // Leaf function: mul(a, b) - sync
395        idx.index_function(
396            make_hash(2),
397            "mul".into(),
398            2,
399            4,
400            vec![],
401            vec!["Number".into()],
402            false,
403            false,
404            0,
405        );
406
407        // Calls add: sum_and_mul(a, b, c) - sync
408        idx.index_function(
409            make_hash(3),
410            "sum_and_mul".into(),
411            3,
412            12,
413            vec![make_hash(1), make_hash(2)],
414            vec!["Number".into()],
415            false,
416            false,
417            0,
418        );
419
420        // Async closure that captures: fetch_data() - async, closure
421        idx.index_function(
422            make_hash(4),
423            "fetch_data".into(),
424            0,
425            20,
426            vec![make_hash(3)],
427            vec!["DataRow".into()],
428            true,
429            true,
430            2,
431        );
432
433        // Deep chain: orchestrate() calls fetch_data
434        idx.index_function(
435            make_hash(5),
436            "orchestrate".into(),
437            1,
438            30,
439            vec![make_hash(4)],
440            vec![],
441            true,
442            false,
443            0,
444        );
445
446        idx
447    }
448
449    #[test]
450    fn test_new_index_is_empty() {
451        let idx = CodeIndex::new();
452        assert!(idx.is_empty());
453        assert_eq!(idx.len(), 0);
454    }
455
456    #[test]
457    fn test_index_function_and_len() {
458        let idx = build_test_index();
459        assert_eq!(idx.len(), 5);
460        assert!(!idx.is_empty());
461    }
462
463    #[test]
464    fn test_search_by_arity() {
465        let idx = build_test_index();
466        let result = idx.search(&FunctionQuery::new().with_arity(2));
467        assert_eq!(result.matches.len(), 2);
468        let names: HashSet<&str> = result.matches.iter().map(|f| f.name.as_str()).collect();
469        assert!(names.contains("add"));
470        assert!(names.contains("mul"));
471        assert_eq!(result.total_indexed, 5);
472    }
473
474    #[test]
475    fn test_search_by_name_pattern() {
476        let idx = build_test_index();
477        let result = idx.search(&FunctionQuery::new().with_name_pattern("mul"));
478        assert_eq!(result.matches.len(), 2); // "mul" and "sum_and_mul"
479    }
480
481    #[test]
482    fn test_search_by_async() {
483        let idx = build_test_index();
484        let result = idx.search(&FunctionQuery::new().with_async(true));
485        assert_eq!(result.matches.len(), 2);
486        let names: HashSet<&str> = result.matches.iter().map(|f| f.name.as_str()).collect();
487        assert!(names.contains("fetch_data"));
488        assert!(names.contains("orchestrate"));
489    }
490
491    #[test]
492    fn test_search_by_closure() {
493        let idx = build_test_index();
494        let result = idx.search(&FunctionQuery::new().with_closure(true));
495        assert_eq!(result.matches.len(), 1);
496        assert_eq!(result.matches[0].name, "fetch_data");
497        assert!(result.matches[0].has_captures);
498    }
499
500    #[test]
501    fn test_search_by_calls_function() {
502        let idx = build_test_index();
503        let result = idx.search(&FunctionQuery::new().with_calls_function(make_hash(1)));
504        assert_eq!(result.matches.len(), 1);
505        assert_eq!(result.matches[0].name, "sum_and_mul");
506    }
507
508    #[test]
509    fn test_search_by_uses_type() {
510        let idx = build_test_index();
511        let result = idx.search(&FunctionQuery::new().with_uses_type("Number"));
512        assert_eq!(result.matches.len(), 2);
513        let names: HashSet<&str> = result.matches.iter().map(|f| f.name.as_str()).collect();
514        assert!(names.contains("mul"));
515        assert!(names.contains("sum_and_mul"));
516    }
517
518    #[test]
519    fn test_search_combined_filters() {
520        let idx = build_test_index();
521        let result = idx.search(&FunctionQuery::new().with_arity(2).with_uses_type("Number"));
522        assert_eq!(result.matches.len(), 1);
523        assert_eq!(result.matches[0].name, "mul");
524    }
525
526    #[test]
527    fn test_search_instruction_range() {
528        let idx = build_test_index();
529        let result = idx.search(
530            &FunctionQuery::new()
531                .with_min_instructions(10)
532                .with_max_instructions(25),
533        );
534        assert_eq!(result.matches.len(), 2);
535        let names: HashSet<&str> = result.matches.iter().map(|f| f.name.as_str()).collect();
536        assert!(names.contains("sum_and_mul"));
537        assert!(names.contains("fetch_data"));
538    }
539
540    #[test]
541    fn test_search_no_matches() {
542        let idx = build_test_index();
543        let result = idx.search(&FunctionQuery::new().with_arity(99));
544        assert!(result.matches.is_empty());
545        assert_eq!(result.total_indexed, 5);
546    }
547
548    #[test]
549    fn test_find_callers() {
550        let idx = build_test_index();
551        // Who calls add (hash 1)?
552        let callers = idx.find_callers(make_hash(1));
553        assert_eq!(callers.len(), 1);
554        assert_eq!(callers[0].name, "sum_and_mul");
555
556        // Who calls sum_and_mul (hash 3)?
557        let callers = idx.find_callers(make_hash(3));
558        assert_eq!(callers.len(), 1);
559        assert_eq!(callers[0].name, "fetch_data");
560    }
561
562    #[test]
563    fn test_find_callers_none() {
564        let idx = build_test_index();
565        // orchestrate (hash 5) is not called by anyone in the index.
566        let callers = idx.find_callers(make_hash(5));
567        assert!(callers.is_empty());
568    }
569
570    #[test]
571    fn test_find_callees() {
572        let idx = build_test_index();
573        // sum_and_mul calls add and mul.
574        let callees = idx.find_callees(make_hash(3));
575        assert_eq!(callees.len(), 2);
576        let names: HashSet<&str> = callees.iter().map(|f| f.name.as_str()).collect();
577        assert!(names.contains("add"));
578        assert!(names.contains("mul"));
579    }
580
581    #[test]
582    fn test_find_callees_leaf() {
583        let idx = build_test_index();
584        let callees = idx.find_callees(make_hash(1));
585        assert!(callees.is_empty());
586    }
587
588    #[test]
589    fn test_dependency_depth_leaf() {
590        let idx = build_test_index();
591        assert_eq!(idx.dependency_depth(make_hash(1)), Some(0));
592        assert_eq!(idx.dependency_depth(make_hash(2)), Some(0));
593    }
594
595    #[test]
596    fn test_dependency_depth_one_level() {
597        let idx = build_test_index();
598        // sum_and_mul -> {add, mul}, both depth 0 => depth 1
599        assert_eq!(idx.dependency_depth(make_hash(3)), Some(1));
600    }
601
602    #[test]
603    fn test_dependency_depth_two_levels() {
604        let idx = build_test_index();
605        // fetch_data -> sum_and_mul (depth 1) => depth 2
606        assert_eq!(idx.dependency_depth(make_hash(4)), Some(2));
607    }
608
609    #[test]
610    fn test_dependency_depth_three_levels() {
611        let idx = build_test_index();
612        // orchestrate -> fetch_data (depth 2) => depth 3
613        assert_eq!(idx.dependency_depth(make_hash(5)), Some(3));
614    }
615
616    #[test]
617    fn test_dependency_depth_unknown_hash() {
618        let idx = build_test_index();
619        assert_eq!(idx.dependency_depth(make_hash(99)), None);
620    }
621
622    #[test]
623    fn test_dependency_depth_cycle() {
624        let mut idx = CodeIndex::new();
625        // a -> b -> a (cycle)
626        idx.index_function(
627            make_hash(10),
628            "a".into(),
629            0,
630            1,
631            vec![make_hash(11)],
632            vec![],
633            false,
634            false,
635            0,
636        );
637        idx.index_function(
638            make_hash(11),
639            "b".into(),
640            0,
641            1,
642            vec![make_hash(10)],
643            vec![],
644            false,
645            false,
646            0,
647        );
648        // Should not hang; cycle breaks to 0.
649        let depth = idx.dependency_depth(make_hash(10));
650        assert!(depth.is_some());
651        assert!(depth.unwrap() <= 2);
652    }
653
654    #[test]
655    fn test_function_query_builder_chain() {
656        let q = FunctionQuery::new()
657            .with_name_pattern("foo")
658            .with_arity(3)
659            .with_min_instructions(5)
660            .with_max_instructions(100)
661            .with_async(true)
662            .with_closure(false)
663            .with_uses_type("Bar");
664
665        assert_eq!(q.name_pattern.as_deref(), Some("foo"));
666        assert_eq!(q.arity, Some(3));
667        assert_eq!(q.min_instructions, Some(5));
668        assert_eq!(q.max_instructions, Some(100));
669        assert_eq!(q.is_async, Some(true));
670        assert_eq!(q.is_closure, Some(false));
671        assert_eq!(q.uses_type.as_deref(), Some("Bar"));
672    }
673}