Skip to main content

st/
tree_sitter_quantum.rs

1//! Tree-sitter based quantum compression - "Semantic awareness meets compression!" - Omni
2//! Uses AST parsing to extract only the most meaningful code structures
3
4use anyhow::Result;
5use std::collections::HashMap;
6
7// For now, we'll create a trait that can be implemented with tree-sitter later
8pub trait LanguageQuantumParser {
9    /// Extract semantically important nodes from source code
10    fn extract_quantum_nodes(&self, source: &str) -> Result<Vec<QuantumNode>>;
11
12    /// Score the importance of a node (0.0 to 1.0)
13    fn score_importance(&self, node: &QuantumNode) -> f32;
14}
15
16#[derive(Debug, Clone)]
17pub struct QuantumNode {
18    pub kind: NodeKind,
19    pub name: String,
20    pub content: String,
21    pub byte_range: (usize, usize),
22    pub importance: f32,
23}
24
25#[derive(Debug, Clone, PartialEq)]
26pub enum NodeKind {
27    Function,
28    Struct,
29    Enum,
30    Trait,
31    Module,
32    Import,
33    Constant,
34    Type,
35    Test,
36    Comment,
37}
38
39/// Rust language quantum parser
40pub struct RustQuantumParser;
41
42impl Default for RustQuantumParser {
43    fn default() -> Self {
44        Self::new()
45    }
46}
47
48impl RustQuantumParser {
49    pub fn new() -> Self {
50        Self
51    }
52
53    /// Simplified version without tree-sitter dependency for now
54    /// This demonstrates the concept until we add tree-sitter
55    pub fn summarize_rust_code(&self, source_code: &str) -> Vec<String> {
56        let mut highlights = vec![];
57
58        // Simple regex-based extraction for now
59        // TODO: Replace with tree-sitter AST parsing
60
61        // Extract function signatures
62        let fn_regex =
63            regex::Regex::new(r"(?m)^[\s]*(?:pub\s+)?(?:async\s+)?fn\s+(\w+)[^{]+").unwrap();
64        for cap in fn_regex.captures_iter(source_code) {
65            if let Some(sig) = cap.get(0) {
66                let sig_str = sig.as_str().trim();
67                // Take only the signature, not the body
68                if let Some(paren_end) = sig_str.rfind(')') {
69                    let end = sig_str[paren_end..]
70                        .find('{')
71                        .map(|i| paren_end + i)
72                        .unwrap_or(sig_str.len());
73                    highlights.push(format!("fn: {}", sig_str[..end].trim()));
74                }
75            }
76        }
77
78        // Extract struct definitions
79        let struct_regex = regex::Regex::new(r"(?m)^[\s]*(?:pub\s+)?struct\s+(\w+)").unwrap();
80        for cap in struct_regex.captures_iter(source_code) {
81            if let Some(name) = cap.get(1) {
82                highlights.push(format!("struct: {}", name.as_str()));
83            }
84        }
85
86        // Extract trait definitions
87        let trait_regex = regex::Regex::new(r"(?m)^[\s]*(?:pub\s+)?trait\s+(\w+)").unwrap();
88        for cap in trait_regex.captures_iter(source_code) {
89            if let Some(name) = cap.get(1) {
90                highlights.push(format!("trait: {}", name.as_str()));
91            }
92        }
93
94        // Extract module definitions
95        let mod_regex = regex::Regex::new(r"(?m)^[\s]*(?:pub\s+)?mod\s+(\w+)").unwrap();
96        for cap in mod_regex.captures_iter(source_code) {
97            if let Some(name) = cap.get(1) {
98                highlights.push(format!("mod: {}", name.as_str()));
99            }
100        }
101
102        highlights
103    }
104}
105
106impl LanguageQuantumParser for RustQuantumParser {
107    fn extract_quantum_nodes(&self, source: &str) -> Result<Vec<QuantumNode>> {
108        let mut nodes = Vec::new();
109
110        // Function extraction with importance scoring
111        let fn_regex =
112            regex::Regex::new(r"(?m)^[\s]*(?:pub\s+)?(?:async\s+)?fn\s+(\w+)[^{]+").unwrap();
113        for cap in fn_regex.captures_iter(source) {
114            if let (Some(full_match), Some(name)) = (cap.get(0), cap.get(1)) {
115                let importance = if full_match.as_str().contains("pub") {
116                    0.9
117                } else if name.as_str() == "main" {
118                    1.0
119                } else if name.as_str().starts_with("test_") {
120                    0.3
121                } else {
122                    0.6
123                };
124
125                nodes.push(QuantumNode {
126                    kind: NodeKind::Function,
127                    name: name.as_str().to_string(),
128                    content: full_match.as_str().to_string(),
129                    byte_range: (full_match.start(), full_match.end()),
130                    importance,
131                });
132            }
133        }
134
135        // Sort by importance
136        nodes.sort_by(|a, b| b.importance.partial_cmp(&a.importance).unwrap());
137
138        Ok(nodes)
139    }
140
141    fn score_importance(&self, node: &QuantumNode) -> f32 {
142        node.importance
143    }
144}
145
146/// Python language quantum parser
147pub struct PythonQuantumParser;
148
149impl Default for PythonQuantumParser {
150    fn default() -> Self {
151        Self::new()
152    }
153}
154
155impl PythonQuantumParser {
156    pub fn new() -> Self {
157        Self
158    }
159}
160
161impl LanguageQuantumParser for PythonQuantumParser {
162    fn extract_quantum_nodes(&self, source: &str) -> Result<Vec<QuantumNode>> {
163        let mut nodes = Vec::new();
164
165        // Class extraction
166        let class_regex = regex::Regex::new(r"(?m)^class\s+(\w+)").unwrap();
167        for cap in class_regex.captures_iter(source) {
168            if let (Some(full_match), Some(name)) = (cap.get(0), cap.get(1)) {
169                nodes.push(QuantumNode {
170                    kind: NodeKind::Struct, // Using Struct for classes
171                    name: name.as_str().to_string(),
172                    content: full_match.as_str().to_string(),
173                    byte_range: (full_match.start(), full_match.end()),
174                    importance: 0.8,
175                });
176            }
177        }
178
179        // Function extraction
180        let fn_regex = regex::Regex::new(r"(?m)^def\s+(\w+)").unwrap();
181        for cap in fn_regex.captures_iter(source) {
182            if let (Some(full_match), Some(name)) = (cap.get(0), cap.get(1)) {
183                let importance = if name.as_str() == "__init__" {
184                    0.9
185                } else if name.as_str().starts_with("_") {
186                    0.4
187                } else if name.as_str() == "main" {
188                    1.0
189                } else {
190                    0.6
191                };
192
193                nodes.push(QuantumNode {
194                    kind: NodeKind::Function,
195                    name: name.as_str().to_string(),
196                    content: full_match.as_str().to_string(),
197                    byte_range: (full_match.start(), full_match.end()),
198                    importance,
199                });
200            }
201        }
202
203        nodes.sort_by(|a, b| b.importance.partial_cmp(&a.importance).unwrap());
204        Ok(nodes)
205    }
206
207    fn score_importance(&self, node: &QuantumNode) -> f32 {
208        node.importance
209    }
210}
211
212/// Factory for creating language-specific quantum parsers
213pub struct QuantumParserFactory;
214
215impl QuantumParserFactory {
216    pub fn create_parser(language: &str) -> Option<Box<dyn LanguageQuantumParser>> {
217        match language.to_lowercase().as_str() {
218            "rust" | "rs" => Some(Box::new(RustQuantumParser::new())),
219            "python" | "py" => Some(Box::new(PythonQuantumParser::new())),
220            _ => None,
221        }
222    }
223}
224
225/// Quantum compression that uses semantic analysis
226pub struct SemanticQuantumCompressor {
227    parsers: HashMap<String, Box<dyn LanguageQuantumParser>>,
228}
229
230impl Default for SemanticQuantumCompressor {
231    fn default() -> Self {
232        Self::new()
233    }
234}
235
236impl SemanticQuantumCompressor {
237    pub fn new() -> Self {
238        let mut parsers = HashMap::new();
239
240        // Pre-register parsers
241        parsers.insert(
242            "rust".to_string(),
243            Box::new(RustQuantumParser::new()) as Box<dyn LanguageQuantumParser>,
244        );
245        parsers.insert(
246            "python".to_string(),
247            Box::new(PythonQuantumParser::new()) as Box<dyn LanguageQuantumParser>,
248        );
249
250        Self { parsers }
251    }
252
253    /// Compress source code using semantic understanding
254    pub fn compress_semantic(
255        &self,
256        source: &str,
257        language: &str,
258        max_nodes: usize,
259    ) -> Result<String> {
260        let parser = self
261            .parsers
262            .get(language)
263            .ok_or_else(|| anyhow::anyhow!("Unsupported language: {}", language))?;
264
265        let nodes = parser.extract_quantum_nodes(source)?;
266
267        // Take only the most important nodes up to max_nodes
268        let important_nodes: Vec<_> = nodes.into_iter().take(max_nodes).collect();
269
270        // Build compressed representation
271        let mut output = format!("QUANTUM_SEMANTIC_V1:lang={}\n", language);
272
273        for node in important_nodes {
274            output.push_str(&format!(
275                "{:?}:{} [{:.2}]\n",
276                node.kind, node.name, node.importance
277            ));
278        }
279
280        Ok(output)
281    }
282}
283
284#[cfg(test)]
285mod tests {
286    use super::*;
287
288    #[test]
289    fn test_rust_quantum_parser() {
290        let source = r#"
291pub struct Scanner {
292    root: PathBuf,
293}
294
295impl Scanner {
296    pub fn new(path: &Path) -> Result<Self> {
297        Ok(Self { root: path.to_path_buf() })
298    }
299    
300    fn internal_method(&self) -> bool {
301        true
302    }
303}
304
305fn main() {
306    println!("Hello!");
307}
308
309#[test]
310fn test_scanner() {
311    // test
312}
313"#;
314
315        let parser = RustQuantumParser::new();
316        let nodes = parser.extract_quantum_nodes(source).unwrap();
317
318        // Should prioritize main > pub fn > private fn > test
319        assert!(nodes[0].name == "main");
320        assert!(nodes.iter().any(|n| n.name == "new" && n.importance > 0.8));
321        assert!(nodes
322            .iter()
323            .any(|n| n.name == "test_scanner" && n.importance < 0.5));
324    }
325}