Skip to main content

argyph_core/
index.rs

1use std::collections::HashMap;
2use std::io::BufRead;
3use std::process::Command;
4use std::sync::{Arc, OnceLock};
5use std::time::SystemTime;
6
7use argyph_embed::Embedder;
8use argyph_fs::ChangedPath;
9use argyph_fs::FileEntry;
10use argyph_graph::edge::Edge;
11use argyph_graph::graph::SymbolOutline;
12use argyph_graph::selector::SymbolSelector;
13use argyph_pack::{self, DefaultPacker, PackContext, PackRequest, PackResult, PackScope, Packer};
14use argyph_parse::types::Symbol;
15use argyph_parse::SymbolId;
16use argyph_store::Store;
17use camino::{Utf8Path, Utf8PathBuf};
18use regex::Regex;
19
20use crate::error::{CoreError, Result};
21
22pub struct SearchFilter {
23    pub paths_glob: Option<Vec<String>>,
24    pub exclude_glob: Option<Vec<String>>,
25}
26
27pub struct SearchHit {
28    pub file: Utf8PathBuf,
29    pub line: u64,
30    pub column: u64,
31    pub match_text: String,
32}
33
34pub struct SearchResult {
35    pub hits: Vec<SearchHit>,
36    pub truncated: bool,
37}
38
39pub struct LanguageSummary {
40    pub name: String,
41    pub files: u64,
42}
43
44pub struct GitInfo {
45    pub branch: String,
46    pub head_short: String,
47    pub dirty: bool,
48}
49
50pub struct RepoOverview {
51    pub languages: Vec<LanguageSummary>,
52    pub entry_points: Vec<String>,
53    pub readme_excerpt: String,
54    pub tree: String,
55    pub git: Option<GitInfo>,
56}
57
58pub struct SemanticHit {
59    pub chunk_id: String,
60    pub chunk_text: String,
61    pub file: String,
62    pub byte_range: (u32, u32),
63    pub line_range: (u32, u32),
64    pub score: f32,
65    pub source: String,
66}
67
68pub struct SemanticResult {
69    pub hits: Vec<SemanticHit>,
70    pub total_embedded: usize,
71    pub total_chunks: usize,
72}
73
74/// The single domain facade that UI layers consume.
75///
76/// All queries go through `Index`; no caller outside `argyph-core` touches the
77/// underlying [`Store`] directly.
78pub struct Index {
79    store: Arc<dyn Store>,
80    embedder: Arc<OnceLock<Arc<dyn Embedder>>>,
81}
82
83impl Index {
84    pub(crate) fn new(store: Arc<dyn Store>, embedder: Arc<OnceLock<Arc<dyn Embedder>>>) -> Self {
85        Self { store, embedder }
86    }
87
88    pub fn protocol_version() -> &'static str {
89        "0.1.0"
90    }
91
92    pub async fn get_file(&self, path: &Utf8Path) -> Result<Option<FileEntry>> {
93        Ok(self.store.get_file(path).await?)
94    }
95
96    pub async fn list_files(&self) -> Result<Vec<FileEntry>> {
97        Ok(self.store.list_files().await?)
98    }
99
100    pub async fn status(&self) -> Result<IndexStatus> {
101        let files = self.store.list_files().await?;
102        Ok(IndexStatus {
103            protocol_version: Self::protocol_version().to_string(),
104            file_count: files.len() as u64,
105            snapshot_at: SystemTime::now(),
106        })
107    }
108
109    pub async fn search_text(
110        &self,
111        root: &Utf8Path,
112        pattern: &str,
113        regex: bool,
114        case_sensitive: bool,
115        max_results: u64,
116        filter: Option<SearchFilter>,
117    ) -> Result<SearchResult> {
118        let max = max_results.clamp(1, 1000);
119
120        let re = build_regex(pattern, regex, case_sensitive)?;
121
122        let files = self.store.list_files().await?;
123        let files: Vec<_> = files
124            .into_iter()
125            .filter(|f| match &filter {
126                Some(filt) => path_matches_filter(f.path.as_str(), filt),
127                None => true,
128            })
129            .collect();
130
131        let mut hits = Vec::new();
132        'outer: for entry in &files {
133            let file_path = root.join(entry.path.as_str());
134            let f = match std::fs::File::open(file_path.as_str()) {
135                Ok(f) => f,
136                Err(_) => continue,
137            };
138            let reader = std::io::BufReader::new(f);
139            for (line_no, line_result) in reader.lines().enumerate() {
140                let line = match line_result {
141                    Ok(l) => l,
142                    Err(_) => continue,
143                };
144                for mat in re.find_iter(&line) {
145                    hits.push(SearchHit {
146                        file: entry.path.clone(),
147                        line: (line_no + 1) as u64,
148                        column: (mat.start() + 1) as u64,
149                        match_text: mat.as_str().to_string(),
150                    });
151                    if hits.len() >= max as usize {
152                        break 'outer;
153                    }
154                }
155            }
156        }
157
158        let total: usize = files
159            .iter()
160            .filter_map(|f| {
161                let fp = root.join(f.path.as_str());
162                std::fs::read_to_string(fp.as_str())
163                    .ok()
164                    .map(|c| re.find_iter(&c).count())
165            })
166            .sum();
167        let truncated = total > max as usize;
168
169        Ok(SearchResult { hits, truncated })
170    }
171
172    pub async fn search_semantic(
173        &self,
174        query: &str,
175        k: usize,
176        filter: Option<&argyph_store::search::SearchFilter>,
177    ) -> Result<SemanticResult> {
178        let embedder = self.embedder.get().ok_or_else(|| {
179            CoreError::Embed("no embedder configured — cannot perform semantic search".into())
180        })?;
181
182        let query_vec = embedder
183            .embed_query(query)
184            .await
185            .map_err(|e| CoreError::Embed(format!("{e}")))?;
186
187        let result = self
188            .store
189            .search_hybrid(query, &query_vec, k, filter.unwrap_or(&Default::default()))
190            .await?;
191
192        Ok(SemanticResult {
193            hits: result
194                .hits
195                .into_iter()
196                .map(|h| SemanticHit {
197                    chunk_id: h.chunk_id,
198                    chunk_text: h.chunk_text,
199                    file: h.file,
200                    byte_range: h.byte_range,
201                    line_range: h.line_range,
202                    score: h.score,
203                    source: format!("{:?}", h.source).to_lowercase(),
204                })
205                .collect(),
206            total_embedded: result.total_embedded,
207            total_chunks: result.total_chunks,
208        })
209    }
210
211    pub async fn overview(&self, root: &Utf8Path, max_tree_depth: u64) -> Result<RepoOverview> {
212        let depth = max_tree_depth.clamp(1, 6) as usize;
213        let files = self.store.list_files().await?;
214
215        let mut lang_counts: HashMap<String, u64> = HashMap::new();
216        for f in &files {
217            if let Some(lang) = &f.language {
218                *lang_counts.entry(lang.to_string()).or_default() += 1;
219            }
220        }
221        let mut languages: Vec<LanguageSummary> = lang_counts
222            .into_iter()
223            .map(|(name, count)| LanguageSummary { name, files: count })
224            .collect();
225        languages.sort_by(|a, b| b.files.cmp(&a.files));
226
227        let entry_points: Vec<String> = [
228            "src/main.rs",
229            "src/lib.rs",
230            "main.rs",
231            "lib.rs",
232            "src/index.ts",
233            "src/index.js",
234            "src/index.py",
235        ]
236        .iter()
237        .filter(|p| files.iter().any(|f| f.path.as_str() == **p))
238        .map(|s| s.to_string())
239        .collect();
240
241        let readme_excerpt = Self::read_readme(root);
242        let tree = Self::build_tree(&files, depth);
243        let git = Self::get_git_info(root);
244
245        Ok(RepoOverview {
246            languages,
247            entry_points,
248            readme_excerpt,
249            tree,
250            git,
251        })
252    }
253
254    pub async fn find_symbol(&self, name: &str, file: Option<&Utf8Path>) -> Result<Vec<Symbol>> {
255        Ok(self.store.find_symbol(name, file).await?)
256    }
257
258    pub async fn find_references(&self, sel: &SymbolSelector) -> Result<Vec<Edge>> {
259        Ok(self.store.find_references(sel).await?)
260    }
261
262    pub async fn get_callers(&self, sel: &SymbolSelector) -> Result<Vec<Edge>> {
263        Ok(self.store.get_callers(sel).await?)
264    }
265
266    pub async fn get_callees(&self, sel: &SymbolSelector) -> Result<Vec<Edge>> {
267        Ok(self.store.get_callees(sel).await?)
268    }
269
270    pub async fn get_imports(&self, file: &Utf8Path) -> Result<Vec<Edge>> {
271        Ok(self.store.get_imports(file).await?)
272    }
273
274    pub async fn get_symbol_outline(&self, file: &Utf8Path) -> Result<Vec<SymbolOutline>> {
275        Ok(self.store.get_symbol_outline(file).await?)
276    }
277
278    pub async fn reindex(&self, root: &Utf8Path, changes: &[ChangedPath]) -> Result<()> {
279        crate::tiers::incremental_reindex(root, &*self.store, changes).await
280    }
281
282    pub async fn pack(&self, root: &Utf8Path, req: &PackRequest) -> Result<PackResult> {
283        let packer = DefaultPacker::new().map_err(|e| CoreError::Io(std::io::Error::other(e)))?;
284        let ctx = IndexPackContext {
285            index: self,
286            root: root.to_owned(),
287        };
288        packer
289            .pack(req, &ctx)
290            .map_err(|e| CoreError::Io(std::io::Error::other(e)))
291    }
292
293    // ── helpers ────────────────────────────────────────────────
294
295    fn build_tree(files: &[FileEntry], depth: usize) -> String {
296        let mut paths: Vec<&str> = files.iter().map(|f| f.path.as_str()).collect();
297        paths.sort();
298        paths.truncate(500);
299        let mut out = String::new();
300        let mut prev: Vec<&str> = vec![];
301        for path in &paths {
302            let parts: Vec<&str> = path.split('/').collect();
303            let common = prev.iter().zip(&parts).filter(|(a, b)| a == b).count();
304            if common < depth {
305                for (i, part) in parts.iter().enumerate().skip(common).take(depth - common) {
306                    let indent = "  ".repeat(i);
307                    out.push_str(&format!("{indent}{part}/\n"));
308                }
309            }
310            prev = parts;
311        }
312        out
313    }
314
315    fn read_readme(root: &camino::Utf8Path) -> String {
316        for name in &["README.md", "README", "readme.md"] {
317            let path = root.join(name);
318            if let Ok(content) = std::fs::read_to_string(path.as_str()) {
319                return content.lines().take(10).collect::<Vec<_>>().join("\n");
320            }
321        }
322        String::new()
323    }
324
325    fn get_git_info(root: &camino::Utf8Path) -> Option<GitInfo> {
326        let git_dir = root.join(".git");
327        if !git_dir.exists() {
328            return None;
329        }
330        let run = |args: &[&str]| -> Option<String> {
331            Command::new("git")
332                .args(args)
333                .current_dir(root.as_str())
334                .output()
335                .ok()
336                .map(|o| String::from_utf8_lossy(&o.stdout).trim().to_string())
337        };
338        let branch = run(&["rev-parse", "--abbrev-ref", "HEAD"])?;
339        let head_short = run(&["rev-parse", "--short", "HEAD"])?;
340        let dirty = Command::new("git")
341            .args(["diff", "--quiet"])
342            .current_dir(root.as_str())
343            .status()
344            .ok()
345            .map(|s| !s.success())?;
346        Some(GitInfo {
347            branch,
348            head_short,
349            dirty,
350        })
351    }
352}
353
354struct IndexPackContext<'a> {
355    index: &'a Index,
356    root: Utf8PathBuf,
357}
358
359impl PackContext for IndexPackContext<'_> {
360    fn list_files(&self, scope: &PackScope) -> Vec<Utf8PathBuf> {
361        let files = tokio::runtime::Handle::current()
362            .block_on(self.index.list_files())
363            .unwrap_or_default();
364        let paths: Vec<Utf8PathBuf> = files.into_iter().map(|f| f.path).collect();
365        match scope {
366            PackScope::All => paths,
367            PackScope::Paths(requested) => {
368                let requested_set: std::collections::HashSet<_> = requested.iter().collect();
369                paths
370                    .into_iter()
371                    .filter(|p| requested_set.contains(p))
372                    .collect()
373            }
374            PackScope::Symbol(name) => {
375                let indexed_set: std::collections::HashSet<_> = paths.iter().collect();
376                let syms = tokio::runtime::Handle::current()
377                    .block_on(self.index.find_symbol(name, None))
378                    .unwrap_or_default();
379                let mut file_set: std::collections::HashSet<Utf8PathBuf> =
380                    std::collections::HashSet::new();
381                for sym in &syms {
382                    file_set.insert(sym.file.clone());
383                    let selector = SymbolSelector::ById(sym.id.clone());
384                    if let Ok(callees) = tokio::runtime::Handle::current()
385                        .block_on(self.index.get_callees(&selector))
386                    {
387                        for edge in &callees {
388                            if let Some(f) = file_from_symbol_id(&edge.to) {
389                                file_set.insert(f);
390                            }
391                        }
392                    }
393                    if let Ok(refs) = tokio::runtime::Handle::current()
394                        .block_on(self.index.find_references(&selector))
395                    {
396                        for edge in &refs {
397                            if let Some(f) = file_from_symbol_id(&edge.from) {
398                                file_set.insert(f);
399                            }
400                        }
401                    }
402                }
403                file_set
404                    .into_iter()
405                    .filter(|p| indexed_set.contains(p))
406                    .collect()
407            }
408        }
409    }
410
411    fn read(&self, file: &Utf8Path) -> argyph_pack::Result<String> {
412        let full_path = self.root.join(file.as_str());
413        std::fs::read_to_string(full_path.as_str())
414            .map_err(|e| argyph_pack::PackError::Io(e.to_string()))
415    }
416
417    fn modified(&self, file: &Utf8Path) -> Option<SystemTime> {
418        tokio::runtime::Handle::current()
419            .block_on(self.index.get_file(file))
420            .ok()
421            .flatten()
422            .map(|entry| entry.modified)
423    }
424
425    fn in_edges(&self, file: &Utf8Path) -> argyph_pack::Result<usize> {
426        tokio::runtime::Handle::current()
427            .block_on(self.index.get_imports(file))
428            .map(|edges| edges.len())
429            .map_err(|e| argyph_pack::PackError::Io(e.to_string()))
430    }
431}
432
433fn build_regex(pattern: &str, regex: bool, case_sensitive: bool) -> Result<Regex> {
434    let pat = if regex {
435        pattern.to_string()
436    } else {
437        regex::escape(pattern)
438    };
439    regex::RegexBuilder::new(&pat)
440        .case_insensitive(!case_sensitive)
441        .build()
442        .map_err(|e| crate::CoreError::Io(std::io::Error::new(std::io::ErrorKind::InvalidInput, e)))
443}
444
445fn path_matches_filter(path: &str, filter: &SearchFilter) -> bool {
446    let globs_ok = filter
447        .paths_glob
448        .as_ref()
449        .is_none_or(|globs| globs.iter().any(|g| glob_match(g, path)));
450    let excludes_ok = filter
451        .exclude_glob
452        .as_ref()
453        .is_none_or(|globs| !globs.iter().any(|g| glob_match(g, path)));
454    globs_ok && excludes_ok
455}
456
457fn glob_match(glob: &str, path: &str) -> bool {
458    let cleaned = glob.trim_start_matches('!');
459    if let Ok(re) = glob_to_regex(cleaned) {
460        re.is_match(path)
461    } else {
462        path.contains(cleaned)
463    }
464}
465
466fn glob_to_regex(glob: &str) -> std::result::Result<Regex, regex::Error> {
467    let mut pattern = String::from("^");
468    let chars: Vec<char> = glob.chars().collect();
469    let mut i = 0;
470    while i < chars.len() {
471        match chars[i] {
472            '*' if i + 1 < chars.len() && chars[i + 1] == '*' => {
473                pattern.push_str(".*");
474                i += 1;
475            }
476            '*' => pattern.push_str("[^/]*"),
477            '?' => pattern.push_str("[^/]"),
478            '.' | '+' | '(' | ')' | '|' | '^' | '$' | '{' | '}' | '[' | ']' | '\\' => {
479                pattern.push('\\');
480                pattern.push(chars[i]);
481            }
482            c => pattern.push(c),
483        }
484        i += 1;
485    }
486    pattern.push('$');
487    Regex::new(&pattern)
488}
489
490/// Read-only snapshot returned by [`Index::status`].
491#[derive(Debug, Clone)]
492pub struct IndexStatus {
493    pub protocol_version: String,
494    pub file_count: u64,
495    pub snapshot_at: SystemTime,
496}
497
498fn file_from_symbol_id(id: &SymbolId) -> Option<Utf8PathBuf> {
499    let s = id.as_str();
500    let (prefix, _) = s.rsplit_once("::")?;
501    let (file, _) = prefix.rsplit_once("::")?;
502    Some(Utf8PathBuf::from(file))
503}
504
505#[cfg(test)]
506#[allow(clippy::unwrap_used)]
507mod tests {
508    use super::*;
509
510    #[test]
511    fn glob_star_star_matches_subdirs() {
512        let re = glob_to_regex("src/**").unwrap();
513        assert!(re.is_match("src/main.rs"));
514        assert!(re.is_match("src/auth/mod.rs"));
515    }
516
517    #[test]
518    fn glob_single_star_no_slash() {
519        let re = glob_to_regex("*.rs").unwrap();
520        assert!(re.is_match("main.rs"));
521        assert!(!re.is_match("src/main.rs"));
522    }
523
524    #[test]
525    fn build_regex_literal() {
526        let re = build_regex("fn main", false, true).unwrap();
527        assert!(re.is_match("fn main() {}"));
528        assert!(!re.is_match("FN MAIN"));
529    }
530
531    #[test]
532    fn build_regex_case_insensitive() {
533        let re = build_regex("fn", false, false).unwrap();
534        assert!(re.is_match("fn main"));
535        assert!(re.is_match("FN MAIN"));
536    }
537}