Skip to main content

oak_semantic_search/
lib.rs

1#![feature(new_range_api)]
2use oak_core::{
3    errors::OakError,
4    language::{ElementRole, ElementType, Language, UniversalElementRole},
5    tree::{RedNode, red_tree::RedLeaf},
6    visitor::Visitor,
7};
8use serde::{Deserialize, Serialize};
9
10pub trait SemanticSearch: Send + Sync {
11    fn search(&self, query: &str, limit: usize) -> impl std::future::Future<Output = Result<Vec<String>, OakError>> + Send;
12}
13
14/// A default implementation of SemanticSearch that does nothing.
15pub struct NoSemanticSearch;
16
17impl SemanticSearch for NoSemanticSearch {
18    fn search(&self, _query: &str, _limit: usize) -> impl std::future::Future<Output = Result<Vec<String>, OakError>> + Send {
19        async { Err(OakError::semantic_error("Semantic search is not enabled on this server")) }
20    }
21}
22
23#[derive(Debug, Serialize, Deserialize)]
24pub struct CodeChunk {
25    pub text: String,
26    pub range_start: usize,
27    pub range_end: usize,
28    pub role: String,
29}
30
31pub struct SemanticSearcher {}
32
33struct ChunkCollector<'a, L: Language> {
34    source: &'a str,
35    chunks: Vec<CodeChunk>,
36    _phantom: std::marker::PhantomData<L>,
37}
38
39impl<'a, L: Language> ChunkCollector<'a, L> {
40    fn new(source: &'a str) -> Self {
41        Self { source, chunks: Vec::new(), _phantom: std::marker::PhantomData }
42    }
43}
44
45impl<'a, L: Language> Visitor<'a, L> for ChunkCollector<'a, L> {
46    fn visit_node(&mut self, node: RedNode<'a, L>) {
47        let role = node.green.kind.role().universal();
48
49        // Chunking strategy: treat Definitions and Statements as potential chunks
50        match role {
51            UniversalElementRole::Definition | UniversalElementRole::Statement | UniversalElementRole::Documentation => {
52                let range = node.span();
53                let text = self.source[range.start..range.end].to_string();
54
55                // Only index chunks that are meaningful in length
56                if text.len() > 20 {
57                    self.chunks.push(CodeChunk { text, range_start: range.start, range_end: range.end, role: format!("{:?}", role) });
58                }
59            }
60            _ => {}
61        }
62        self.walk_node(node);
63    }
64
65    fn visit_token(&mut self, _token: RedLeaf<L>) {}
66}
67
68impl SemanticSearcher {
69    pub async fn new(_db_path: &str) -> Result<Self, OakError> {
70        Ok(Self {})
71    }
72
73    /// Chunks code using oak-core's AST and indexes it
74    pub async fn index_code<'a, L: Language>(&self, root: RedNode<'a, L>, source: &'a str, _table_name: &str) -> Result<(), OakError> {
75        let mut collector = ChunkCollector::<L>::new(source);
76        collector.visit_node(root);
77
78        if collector.chunks.is_empty() {
79            return Ok(());
80        }
81
82        // Concrete implementation (embedding and vector storage) has been removed.
83        // This is now a placeholder that only performs AST-based chunking.
84
85        Ok(())
86    }
87}
88
89impl SemanticSearch for SemanticSearcher {
90    async fn search(&self, _query: &str, _limit: usize) -> Result<Vec<String>, OakError> {
91        // Concrete implementation (query embedding and vector search) has been removed.
92        Ok(vec![])
93    }
94}