Skip to main content

enya_analyzer/
index.rs

1//! Codebase index for discovered metrics.
2//!
3//! Builds and maintains an in-memory index of all metric instrumentation
4//! points discovered in a repository using registered scanners.
5
6use std::path::{Path, PathBuf};
7use std::sync::Arc;
8use std::sync::atomic::{AtomicUsize, Ordering};
9
10use parking_lot::RwLock;
11use rustc_hash::FxHashSet;
12use walkdir::{DirEntry, WalkDir};
13
14use crate::parser::ParseError;
15use crate::scanner::{AlertRule, MetricInstrumentation, ScannerRegistry, YamlAlertScanner};
16
17/// Directories to exclude from scanning.
18const EXCLUDED_DIRS: [&str; 8] = [
19    "target",
20    ".git",
21    "vendor",
22    "node_modules",
23    "dist",
24    "build",
25    "public",
26    "assets",
27];
28
29/// Discover files in a directory that match the given extensions.
30///
31/// Walks the directory tree, filtering for files with matching extensions
32/// and excluding common build/vendor directories.
33fn discover_files<'a>(
34    root: &Path,
35    extensions: &'a FxHashSet<&str>,
36) -> impl Iterator<Item = DirEntry> + 'a {
37    let root = root.to_path_buf();
38    WalkDir::new(&root)
39        .follow_links(true)
40        .into_iter()
41        .filter_map(Result::ok)
42        .filter(move |entry| {
43            // Check extension matches
44            let has_matching_ext = entry
45                .path()
46                .extension()
47                .and_then(|ext| ext.to_str())
48                .is_some_and(|ext| extensions.contains(ext));
49
50            // Check not in excluded directory
51            let in_excluded_dir = entry.path().components().any(|c| {
52                c.as_os_str()
53                    .to_str()
54                    .is_some_and(|s| EXCLUDED_DIRS.contains(&s))
55            });
56
57            // Skip minified files (*.min.js, *.min.css, etc.)
58            let is_minified = entry
59                .path()
60                .file_name()
61                .and_then(|n| n.to_str())
62                .is_some_and(|name| name.contains(".min."));
63
64            has_matching_ext && !in_excluded_dir && !is_minified
65        })
66}
67
68/// Progress tracking for indexing operations.
69#[derive(Debug, Clone)]
70pub struct IndexProgress {
71    /// Current file being processed (1-indexed)
72    pub current: Arc<AtomicUsize>,
73    /// Total number of files to process
74    pub total: Arc<AtomicUsize>,
75    /// Name of the current file being indexed
76    current_file: Arc<RwLock<Option<String>>>,
77}
78
79impl IndexProgress {
80    /// Create a new progress tracker.
81    #[must_use]
82    pub fn new() -> Self {
83        Self {
84            current: Arc::new(AtomicUsize::new(0)),
85            total: Arc::new(AtomicUsize::new(0)),
86            current_file: Arc::new(RwLock::new(None)),
87        }
88    }
89
90    /// Get the current progress values.
91    #[must_use]
92    pub fn get(&self) -> (usize, usize) {
93        (
94            self.current.load(Ordering::Relaxed),
95            self.total.load(Ordering::Relaxed),
96        )
97    }
98
99    /// Set the current file being indexed.
100    pub fn set_current_file(&self, file_name: Option<String>) {
101        *self.current_file.write() = file_name;
102    }
103
104    /// Get the current file name being indexed.
105    #[must_use]
106    pub fn current_file(&self) -> Option<String> {
107        self.current_file.read().clone()
108    }
109}
110
111impl Default for IndexProgress {
112    fn default() -> Self {
113        Self::new()
114    }
115}
116
117/// An index of all discovered metric instrumentation and alert rules in a codebase.
118#[derive(Debug, Clone)]
119pub struct CodebaseIndex {
120    /// The git URL of the repository.
121    pub repo_url: String,
122    /// The local path to the repository.
123    pub repo_path: PathBuf,
124    /// All discovered metric instrumentation points.
125    pub metrics: Vec<MetricInstrumentation>,
126    /// All discovered Prometheus alert rules.
127    pub alerts: Vec<AlertRule>,
128    /// Unix timestamp when this index was built.
129    pub last_updated: i64,
130}
131
132impl CodebaseIndex {
133    /// Returns the number of unique metric names.
134    #[must_use]
135    pub fn unique_metric_count(&self) -> usize {
136        self.metrics
137            .iter()
138            .map(|m| &m.name)
139            .collect::<FxHashSet<_>>()
140            .len()
141    }
142
143    /// Returns the number of files containing metrics.
144    #[must_use]
145    pub fn files_with_metrics(&self) -> usize {
146        self.metrics
147            .iter()
148            .map(|m| &m.file)
149            .collect::<FxHashSet<_>>()
150            .len()
151    }
152
153    /// Searches for metrics matching the given query.
154    #[must_use]
155    pub fn search(&self, query: &str) -> Vec<&MetricInstrumentation> {
156        let query_lower = query.to_lowercase();
157        self.metrics
158            .iter()
159            .filter(|m| m.name.to_lowercase().contains(&query_lower))
160            .collect()
161    }
162
163    /// Finds all instrumentation points for a specific metric name.
164    ///
165    /// First tries exact matching. If no matches found, falls back to suffix
166    /// matching to handle runtime prefixes (e.g., `app_http_requests_total`
167    /// matches `http_requests_total` in source code).
168    #[must_use]
169    pub fn find_by_name(&self, name: &str) -> Vec<&MetricInstrumentation> {
170        // Try exact match first
171        let exact: Vec<_> = self.metrics.iter().filter(|m| m.name == name).collect();
172        if !exact.is_empty() {
173            return exact;
174        }
175
176        // Fallback: suffix matching for prefixed metric names
177        // e.g., "app_http_requests_total".ends_with("http_requests_total")
178        let suffix_matches: Vec<_> = self
179            .metrics
180            .iter()
181            .filter(|m| name.ends_with(&m.name))
182            .collect();
183
184        // If multiple suffix matches, prefer the longest (most specific) match
185        if suffix_matches.len() > 1 {
186            let max_len = suffix_matches
187                .iter()
188                .map(|m| m.name.len())
189                .max()
190                .unwrap_or(0);
191            suffix_matches
192                .into_iter()
193                .filter(|m| m.name.len() == max_len)
194                .collect()
195        } else {
196            suffix_matches
197        }
198    }
199
200    /// Returns the number of alert rules.
201    #[must_use]
202    pub fn alert_count(&self) -> usize {
203        self.alerts.len()
204    }
205
206    /// Finds all alert rules that reference a specific metric name.
207    #[must_use]
208    pub fn find_alerts_by_metric(&self, metric_name: &str) -> Vec<&AlertRule> {
209        self.alerts
210            .iter()
211            .filter(|a| a.metric_name.as_deref() == Some(metric_name))
212            .collect()
213    }
214
215    /// Finds an alert rule by its name.
216    #[must_use]
217    pub fn find_alert_by_name(&self, alert_name: &str) -> Option<&AlertRule> {
218        self.alerts.iter().find(|a| a.name == alert_name)
219    }
220
221    /// Searches for alert rules matching the given query.
222    #[must_use]
223    pub fn search_alerts(&self, query: &str) -> Vec<&AlertRule> {
224        let query_lower = query.to_lowercase();
225        self.alerts
226            .iter()
227            .filter(|a| {
228                a.name.to_lowercase().contains(&query_lower)
229                    || a.metric_name
230                        .as_ref()
231                        .is_some_and(|m| m.to_lowercase().contains(&query_lower))
232            })
233            .collect()
234    }
235}
236
237/// Builds a codebase index by scanning all supported source files.
238///
239/// Uses the provided [`ScannerRegistry`] to determine which files to scan
240/// and which scanner to use for each file type.
241///
242/// Updates the provided `IndexProgress` atomics as files are processed,
243/// allowing the UI to show progress like "Indexing [5/42]...".
244///
245/// # Errors
246///
247/// Returns an error if scanning fails.
248pub fn build_index_with_progress(
249    repo_url: &str,
250    repo_path: &Path,
251    progress: &IndexProgress,
252    registry: &ScannerRegistry,
253) -> Result<CodebaseIndex, ParseError> {
254    // Get all supported extensions from registered scanners
255    let extensions: FxHashSet<&str> = registry.all_extensions().into_iter().collect();
256
257    // First pass: collect all scannable files
258    let source_files: Vec<_> = discover_files(repo_path, &extensions).collect();
259
260    // Set total count
261    progress.total.store(source_files.len(), Ordering::SeqCst);
262
263    let mut all_metrics = Vec::new();
264
265    // Second pass: scan files with progress updates
266    for (i, entry) in source_files.iter().enumerate() {
267        // Update current progress (1-indexed for display)
268        progress.current.store(i + 1, Ordering::SeqCst);
269
270        let path = entry.path();
271
272        // Update the current file name for status display
273        if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
274            progress.set_current_file(Some(file_name.to_string()));
275        }
276
277        // Find the appropriate scanner for this file
278        let Some(scanner) = registry.scanner_for(path) else {
279            continue;
280        };
281
282        // Scan the file for metrics
283        match scanner.scan_file(path) {
284            Ok(metrics) => {
285                // Convert absolute paths to relative paths from repo root
286                for mut metric in metrics {
287                    if let Ok(relative) = metric.file.strip_prefix(repo_path) {
288                        metric.file = relative.to_path_buf();
289                    }
290                    all_metrics.push(metric);
291                }
292            }
293            Err(e) => {
294                // Log but don't fail on individual file errors
295                log::warn!("Failed to scan {}: {}", path.display(), e);
296            }
297        }
298    }
299
300    // Sort by file path, then line number for consistent ordering
301    all_metrics.sort_by(|a, b| (&a.file, a.line).cmp(&(&b.file, b.line)));
302
303    // Scan for alert rules in YAML files
304    let all_alerts = scan_yaml_alerts(repo_path);
305
306    log::info!(
307        "Indexed {} metrics, {} alerts",
308        all_metrics.len(),
309        all_alerts.len()
310    );
311
312    Ok(CodebaseIndex {
313        repo_url: repo_url.to_string(),
314        repo_path: repo_path.to_path_buf(),
315        metrics: all_metrics,
316        alerts: all_alerts,
317        last_updated: crate::now_unix_secs(),
318    })
319}
320
321/// Scan YAML files for Prometheus alert rules.
322fn scan_yaml_alerts(repo_path: &Path) -> Vec<AlertRule> {
323    let mut alert_scanner = match YamlAlertScanner::new() {
324        Ok(scanner) => scanner,
325        Err(e) => {
326            log::warn!("Failed to initialize YAML alert scanner: {e}");
327            return Vec::new();
328        }
329    };
330    let yaml_extensions: FxHashSet<&str> = ["yaml", "yml"].into_iter().collect();
331
332    let mut all_alerts = Vec::new();
333
334    for entry in discover_files(repo_path, &yaml_extensions) {
335        let path = entry.path();
336        match alert_scanner.scan_file(path) {
337            Ok(alerts) => {
338                // Convert absolute paths to relative paths from repo root
339                for mut alert in alerts {
340                    if let Ok(relative) = alert.file.strip_prefix(repo_path) {
341                        alert.file = relative.to_path_buf();
342                    }
343                    all_alerts.push(alert);
344                }
345            }
346            Err(e) => {
347                // Log but don't fail on individual file errors
348                log::debug!("Failed to scan YAML file {}: {}", path.display(), e);
349            }
350        }
351    }
352
353    // Sort alerts by file path, then line number
354    all_alerts.sort_by(|a, b| (&a.file, a.line).cmp(&(&b.file, b.line)));
355
356    all_alerts
357}
358
359#[cfg(test)]
360mod tests {
361    use super::*;
362    use crate::scanner::MetricKind;
363
364    fn make_test_metric(name: &str, file: &str, line: usize) -> MetricInstrumentation {
365        MetricInstrumentation {
366            kind: MetricKind::Counter,
367            name: name.to_string(),
368            labels: vec![],
369            file: PathBuf::from(file),
370            line,
371            column: 0,
372            function_name: None,
373            impl_type: None,
374        }
375    }
376
377    #[test]
378    fn test_unique_metric_count() {
379        let index = CodebaseIndex {
380            repo_url: "test".to_string(),
381            repo_path: PathBuf::from("/test"),
382            metrics: vec![
383                make_test_metric("http.requests", "a.rs", 1),
384                make_test_metric("http.requests", "b.rs", 1), // Same name, different file
385                make_test_metric("db.queries", "c.rs", 1),
386            ],
387            alerts: vec![],
388            last_updated: 0,
389        };
390
391        assert_eq!(index.unique_metric_count(), 2);
392    }
393
394    #[test]
395    fn test_files_with_metrics() {
396        let index = CodebaseIndex {
397            repo_url: "test".to_string(),
398            repo_path: PathBuf::from("/test"),
399            metrics: vec![
400                make_test_metric("metric1", "a.rs", 1),
401                make_test_metric("metric2", "a.rs", 2), // Same file
402                make_test_metric("metric3", "b.rs", 1),
403            ],
404            alerts: vec![],
405            last_updated: 0,
406        };
407
408        assert_eq!(index.files_with_metrics(), 2);
409    }
410
411    #[test]
412    fn test_search() {
413        let index = CodebaseIndex {
414            repo_url: "test".to_string(),
415            repo_path: PathBuf::from("/test"),
416            metrics: vec![
417                make_test_metric("http.requests", "a.rs", 1),
418                make_test_metric("http.errors", "a.rs", 2),
419                make_test_metric("db.queries", "b.rs", 1),
420            ],
421            alerts: vec![],
422            last_updated: 0,
423        };
424
425        let results = index.search("http");
426        assert_eq!(results.len(), 2);
427
428        let results = index.search("HTTP"); // Case insensitive
429        assert_eq!(results.len(), 2);
430
431        let results = index.search("db");
432        assert_eq!(results.len(), 1);
433    }
434
435    #[test]
436    fn test_find_by_name() {
437        let index = CodebaseIndex {
438            repo_url: "test".to_string(),
439            repo_path: PathBuf::from("/test"),
440            metrics: vec![
441                make_test_metric("http.requests", "a.rs", 1),
442                make_test_metric("http.requests", "b.rs", 5),
443                make_test_metric("other.metric", "c.rs", 1),
444            ],
445            alerts: vec![],
446            last_updated: 0,
447        };
448
449        let results = index.find_by_name("http.requests");
450        assert_eq!(results.len(), 2);
451
452        let results = index.find_by_name("nonexistent");
453        assert_eq!(results.len(), 0);
454    }
455
456    #[test]
457    fn test_find_by_name_suffix_matching() {
458        let index = CodebaseIndex {
459            repo_url: "test".to_string(),
460            repo_path: PathBuf::from("/test"),
461            metrics: vec![
462                make_test_metric("grpc_requests_total", "a.rs", 1),
463                make_test_metric("http_requests", "b.rs", 5),
464            ],
465            alerts: vec![],
466            last_updated: 0,
467        };
468
469        // Exact match still works
470        let results = index.find_by_name("grpc_requests_total");
471        assert_eq!(results.len(), 1);
472        assert_eq!(results[0].name, "grpc_requests_total");
473
474        // Suffix matching: myapp_grpc_requests_total -> grpc_requests_total
475        let results = index.find_by_name("myapp_grpc_requests_total");
476        assert_eq!(results.len(), 1);
477        assert_eq!(results[0].name, "grpc_requests_total");
478
479        // Suffix matching with different prefix
480        let results = index.find_by_name("myapp_http_requests");
481        assert_eq!(results.len(), 1);
482        assert_eq!(results[0].name, "http_requests");
483
484        // No match at all
485        let results = index.find_by_name("myapp_unknown_metric");
486        assert_eq!(results.len(), 0);
487    }
488
489    #[test]
490    fn test_find_by_name_prefers_longest_suffix() {
491        let index = CodebaseIndex {
492            repo_url: "test".to_string(),
493            repo_path: PathBuf::from("/test"),
494            metrics: vec![
495                make_test_metric("requests_total", "a.rs", 1),
496                make_test_metric("grpc_requests_total", "b.rs", 5),
497            ],
498            alerts: vec![],
499            last_updated: 0,
500        };
501
502        // Should prefer the longer match (grpc_requests_total over requests_total)
503        let results = index.find_by_name("myapp_grpc_requests_total");
504        assert_eq!(results.len(), 1);
505        assert_eq!(results[0].name, "grpc_requests_total");
506    }
507
508    fn make_test_alert(name: &str, metric_name: Option<&str>) -> AlertRule {
509        AlertRule {
510            name: name.to_string(),
511            expr: "test_expr".to_string(),
512            metric_name: metric_name.map(String::from),
513            severity: None,
514            message: None,
515            runbook_url: None,
516            file: PathBuf::from("alerts.yaml"),
517            line: 1,
518            column: 0,
519        }
520    }
521
522    #[test]
523    fn test_find_alerts_by_metric() {
524        let index = CodebaseIndex {
525            repo_url: "test".to_string(),
526            repo_path: PathBuf::from("/test"),
527            metrics: vec![],
528            alerts: vec![
529                make_test_alert("HighErrorRate", Some("errors_total")),
530                make_test_alert("HighLatency", Some("latency_seconds")),
531                make_test_alert("AnotherErrorAlert", Some("errors_total")),
532            ],
533            last_updated: 0,
534        };
535
536        let results = index.find_alerts_by_metric("errors_total");
537        assert_eq!(results.len(), 2);
538        assert_eq!(results[0].name, "HighErrorRate");
539        assert_eq!(results[1].name, "AnotherErrorAlert");
540
541        let results = index.find_alerts_by_metric("nonexistent");
542        assert!(results.is_empty());
543    }
544
545    #[test]
546    fn test_search_alerts() {
547        let index = CodebaseIndex {
548            repo_url: "test".to_string(),
549            repo_path: PathBuf::from("/test"),
550            metrics: vec![],
551            alerts: vec![
552                make_test_alert("HighErrorRate", Some("errors_total")),
553                make_test_alert("HighLatency", Some("latency_seconds")),
554            ],
555            last_updated: 0,
556        };
557
558        // Search by alert name
559        let results = index.search_alerts("Error");
560        assert_eq!(results.len(), 1);
561
562        // Search by metric name
563        let results = index.search_alerts("latency");
564        assert_eq!(results.len(), 1);
565
566        // Case insensitive
567        let results = index.search_alerts("HIGH");
568        assert_eq!(results.len(), 2);
569    }
570}