Skip to main content

qex_core/chunk/
mod.rs

1pub mod languages;
2pub mod multi_language;
3pub mod tree_sitter;
4
5use serde::{Deserialize, Serialize};
6use std::fmt;
7
8/// Represents the type of a code chunk
9#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
10#[serde(rename_all = "snake_case")]
11pub enum ChunkType {
12    Function,
13    Method,
14    Class,
15    Struct,
16    Enum,
17    Interface,
18    Trait,
19    Impl,
20    Module,
21    ModuleLevel,
22    ImportBlock,
23    Namespace,
24    Macro,
25    Section,
26    Document,
27}
28
29impl fmt::Display for ChunkType {
30    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
31        match self {
32            ChunkType::Function => write!(f, "function"),
33            ChunkType::Method => write!(f, "method"),
34            ChunkType::Class => write!(f, "class"),
35            ChunkType::Struct => write!(f, "struct"),
36            ChunkType::Enum => write!(f, "enum"),
37            ChunkType::Interface => write!(f, "interface"),
38            ChunkType::Trait => write!(f, "trait"),
39            ChunkType::Impl => write!(f, "impl"),
40            ChunkType::Module => write!(f, "module"),
41            ChunkType::ModuleLevel => write!(f, "module_level"),
42            ChunkType::ImportBlock => write!(f, "import_block"),
43            ChunkType::Namespace => write!(f, "namespace"),
44            ChunkType::Macro => write!(f, "macro"),
45            ChunkType::Section => write!(f, "section"),
46            ChunkType::Document => write!(f, "document"),
47        }
48    }
49}
50
51/// A semantic chunk of code extracted from a source file
52#[derive(Debug, Clone, Serialize, Deserialize)]
53pub struct CodeChunk {
54    /// Unique identifier for this chunk
55    pub id: String,
56    /// The actual source code content
57    pub content: String,
58    /// Type of code construct
59    pub chunk_type: ChunkType,
60    /// Starting line number (1-based)
61    pub start_line: usize,
62    /// Ending line number (1-based)
63    pub end_line: usize,
64    /// Absolute file path
65    pub file_path: String,
66    /// Relative file path from project root
67    pub relative_path: String,
68    /// Folder components of the path (e.g., ["src", "utils", "auth"])
69    pub folder_structure: Vec<String>,
70    /// Name of the construct (function/class/method name)
71    pub name: Option<String>,
72    /// Parent construct name (e.g., class name for methods)
73    pub parent_name: Option<String>,
74    /// Programming language
75    pub language: String,
76    /// Docstring/documentation comment
77    pub docstring: Option<String>,
78    /// Decorator/attribute annotations
79    pub decorators: Vec<String>,
80    /// Import statements within this chunk
81    pub imports: Vec<String>,
82    /// Semantic tags for categorization
83    pub tags: Vec<String>,
84    /// Complexity indicator (rough metric)
85    pub complexity_score: u32,
86}
87
88impl CodeChunk {
89    /// Generate a unique chunk ID from file path, line range, and name
90    pub fn generate_id(file_path: &str, start_line: usize, end_line: usize, name: Option<&str>) -> String {
91        use sha2::{Digest, Sha256};
92        let input = format!("{}:{}:{}:{}", file_path, start_line, end_line, name.unwrap_or(""));
93        let hash = Sha256::digest(input.as_bytes());
94        format!("{:x}", hash)[..16].to_string()
95    }
96
97    /// Extract folder structure from a relative path
98    pub fn extract_folder_structure(relative_path: &str) -> Vec<String> {
99        let path = std::path::Path::new(relative_path);
100        path.parent()
101            .map(|p| {
102                p.components()
103                    .filter_map(|c| match c {
104                        std::path::Component::Normal(s) => s.to_str().map(String::from),
105                        _ => None,
106                    })
107                    .collect()
108            })
109            .unwrap_or_default()
110    }
111
112    /// Compute a rough complexity score based on content
113    pub fn compute_complexity(content: &str) -> u32 {
114        let lines = content.lines().count();
115        let branches = content.matches("if ").count()
116            + content.matches("else").count()
117            + content.matches("match ").count()
118            + content.matches("for ").count()
119            + content.matches("while ").count()
120            + content.matches("case ").count();
121        (lines + branches * 2) as u32
122    }
123
124    /// Extract semantic tags from content
125    pub fn extract_tags(content: &str, chunk_type: &ChunkType) -> Vec<String> {
126        let mut tags = Vec::new();
127        let lower = content.to_lowercase();
128
129        // Async indicators
130        if lower.contains("async ") || lower.contains("await ") || lower.contains(".then(") {
131            tags.push("async".to_string());
132        }
133
134        // Database indicators
135        if lower.contains("query") || lower.contains("sql") || lower.contains("database")
136            || lower.contains("insert") || lower.contains("select ") || lower.contains("table")
137        {
138            tags.push("database".to_string());
139        }
140
141        // Auth indicators
142        if lower.contains("auth") || lower.contains("login") || lower.contains("token")
143            || lower.contains("password") || lower.contains("session") || lower.contains("permission")
144        {
145            tags.push("auth".to_string());
146        }
147
148        // Error handling
149        if lower.contains("error") || lower.contains("exception") || lower.contains("try ")
150            || lower.contains("catch") || lower.contains("result<") || lower.contains("anyhow")
151        {
152            tags.push("error_handling".to_string());
153        }
154
155        // API/HTTP
156        if lower.contains("endpoint") || lower.contains("route") || lower.contains("request")
157            || lower.contains("response") || lower.contains("http") || lower.contains("api")
158        {
159            tags.push("api".to_string());
160        }
161
162        // Testing
163        if lower.contains("#[test]") || lower.contains("#[cfg(test)]")
164            || lower.contains("assert") || lower.contains("mock") || lower.contains("fixture")
165        {
166            tags.push("test".to_string());
167        }
168
169        // Export (JS/TS)
170        if lower.contains("export ") || lower.contains("module.exports") || lower.contains("pub ") {
171            tags.push("export".to_string());
172        }
173
174        // Chunk type tag
175        match chunk_type {
176            ChunkType::Class | ChunkType::Struct => tags.push("type_definition".to_string()),
177            ChunkType::Interface | ChunkType::Trait => tags.push("interface".to_string()),
178            ChunkType::ImportBlock => tags.push("imports".to_string()),
179            _ => {}
180        }
181
182        tags.sort();
183        tags.dedup();
184        tags
185    }
186}