scribe-core 0.5.1

Core types and utilities for the Scribe code analysis library
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
//! Core traits for extensibility and plugin architecture.
//!
//! Defines the essential traits that enable customization and extension
//! of Scribe's analysis pipeline, scoring system, and output formatting.

use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use std::path::Path;

use crate::config::Config;
use crate::error::Result;
use crate::file::FileInfo;
use crate::types::{AnalysisResult, CentralityScores, RepositoryInfo, ScoreComponents};

/// Core trait for file analysis implementations
#[async_trait]
pub trait FileAnalyzer: Send + Sync {
    /// Analyze a single file and return metadata
    async fn analyze_file(&self, file_path: &Path, config: &Config) -> Result<FileInfo>;

    /// Load and analyze file content
    async fn analyze_content(&self, file_info: &mut FileInfo, config: &Config) -> Result<()>;

    /// Batch analyze multiple files for efficiency
    async fn analyze_batch(&self, files: Vec<&Path>, config: &Config) -> Result<Vec<FileInfo>> {
        let mut results = Vec::new();
        for file in files {
            results.push(self.analyze_file(file, config).await?);
        }
        Ok(results)
    }

    /// Get analyzer name/version for caching
    fn name(&self) -> &'static str;

    /// Get analyzer version for cache invalidation
    fn version(&self) -> &'static str;
}

/// Trait for heuristic scoring implementations
pub trait HeuristicScorer: Send + Sync {
    /// Compute heuristic scores for a file
    fn score_file(
        &self,
        file_info: &FileInfo,
        repo_info: &RepositoryInfo,
    ) -> Result<ScoreComponents>;

    /// Batch score multiple files (can be optimized for cross-file analysis)
    fn score_batch(
        &self,
        files: &[&FileInfo],
        repo_info: &RepositoryInfo,
    ) -> Result<Vec<ScoreComponents>> {
        files
            .iter()
            .map(|file| self.score_file(file, repo_info))
            .collect()
    }

    /// Get scorer name for identification
    fn name(&self) -> &'static str;

    /// Get list of score components this scorer produces
    fn score_components(&self) -> Vec<&'static str>;

    /// Check if scorer supports advanced features (V2)
    fn supports_advanced_features(&self) -> bool {
        false
    }
}

/// Trait for repository analysis implementations
#[async_trait]
pub trait RepositoryAnalyzer: Send + Sync {
    /// Analyze repository structure and metadata
    async fn analyze_repository(&self, root_path: &Path, config: &Config)
        -> Result<RepositoryInfo>;

    /// Get repository statistics
    async fn get_statistics(&self, root_path: &Path, files: &[FileInfo]) -> Result<RepositoryInfo>;

    /// Check if this analyzer can handle the given repository
    fn can_analyze(&self, root_path: &Path) -> bool;

    /// Get analyzer priority (higher = preferred)
    fn priority(&self) -> u8 {
        0
    }
}

/// Trait for git integration implementations
#[async_trait]
pub trait GitIntegration: Send + Sync {
    /// Check if path is in a git repository
    async fn is_git_repository(&self, path: &Path) -> Result<bool>;

    /// Get git status for files
    async fn get_file_status(&self, files: &[&Path]) -> Result<Vec<crate::file::GitStatus>>;

    /// Get repository information
    async fn get_repo_info(&self, root_path: &Path) -> Result<GitRepositoryInfo>;

    /// Analyze file churn (commit history)
    async fn analyze_churn(
        &self,
        root_path: &Path,
        depth: usize,
    ) -> Result<Vec<crate::types::ChurnInfo>>;

    /// Check if file should be ignored by git
    async fn should_ignore(&self, file_path: &Path, root_path: &Path) -> Result<bool>;
}

/// Git repository information
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GitRepositoryInfo {
    /// Repository root path
    pub root: std::path::PathBuf,
    /// Current branch
    pub branch: Option<String>,
    /// Remote URL
    pub remote_url: Option<String>,
    /// Last commit hash
    pub last_commit: Option<String>,
    /// Whether repository has uncommitted changes
    pub has_changes: bool,
}

/// Trait for centrality computation implementations
#[async_trait]
pub trait CentralityComputer: Send + Sync {
    /// Build dependency graph from files
    async fn build_dependency_graph(&self, files: &[&FileInfo]) -> Result<DependencyGraph>;

    /// Compute PageRank centrality scores
    async fn compute_centrality(&self, graph: &DependencyGraph) -> Result<CentralityScores>;

    /// Get supported languages for dependency analysis
    fn supported_languages(&self) -> Vec<crate::file::Language>;

    /// Check if file can be analyzed for dependencies
    fn can_analyze_file(&self, file_info: &FileInfo) -> bool;
}

/// Dependency graph representation
#[derive(Debug, Clone)]
pub struct DependencyGraph {
    /// Node IDs (file paths)
    pub nodes: Vec<String>,

    /// Adjacency list (node_id -> \[dependent_node_ids\])
    pub edges: Vec<Vec<usize>>,

    /// Reverse adjacency list (node_id -> \[dependency_node_ids\])
    pub reverse_edges: Vec<Vec<usize>>,

    /// Node metadata
    pub node_metadata: Vec<DependencyNodeMetadata>,
}

/// Metadata for dependency graph nodes
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DependencyNodeMetadata {
    /// File path
    pub path: String,
    /// Programming language
    pub language: crate::file::Language,
    /// File size
    pub size: u64,
    /// Whether this is a test file
    pub is_test: bool,
    /// Whether this is an entrypoint
    pub is_entrypoint: bool,
}

impl DependencyGraph {
    /// Create empty dependency graph
    pub fn new() -> Self {
        Self {
            nodes: Vec::new(),
            edges: Vec::new(),
            reverse_edges: Vec::new(),
            node_metadata: Vec::new(),
        }
    }

    /// Add a node to the graph
    pub fn add_node(&mut self, path: String, metadata: DependencyNodeMetadata) -> usize {
        let node_id = self.nodes.len();
        self.nodes.push(path);
        self.edges.push(Vec::new());
        self.reverse_edges.push(Vec::new());
        self.node_metadata.push(metadata);
        node_id
    }

    /// Add an edge from source to target
    pub fn add_edge(&mut self, source: usize, target: usize) {
        if source < self.edges.len() && target < self.reverse_edges.len() {
            self.edges[source].push(target);
            self.reverse_edges[target].push(source);
        }
    }

    /// Get graph statistics
    pub fn stats(&self) -> crate::types::GraphStats {
        let total_nodes = self.nodes.len();
        let total_edges: usize = self.edges.iter().map(|adj| adj.len()).sum();

        let in_degree_sum: usize = self.reverse_edges.iter().map(|adj| adj.len()).sum();
        let out_degree_sum: usize = self.edges.iter().map(|adj| adj.len()).sum();

        let in_degree_avg = if total_nodes > 0 {
            in_degree_sum as f64 / total_nodes as f64
        } else {
            0.0
        };
        let out_degree_avg = if total_nodes > 0 {
            out_degree_sum as f64 / total_nodes as f64
        } else {
            0.0
        };

        let in_degree_max = self
            .reverse_edges
            .iter()
            .map(|adj| adj.len())
            .max()
            .unwrap_or(0);
        let out_degree_max = self.edges.iter().map(|adj| adj.len()).max().unwrap_or(0);

        let possible_edges = if total_nodes > 1 {
            total_nodes * (total_nodes - 1)
        } else {
            0
        };
        let graph_density = if possible_edges > 0 {
            total_edges as f64 / possible_edges as f64
        } else {
            0.0
        };

        crate::types::GraphStats {
            total_nodes,
            total_edges,
            in_degree_avg,
            in_degree_max,
            out_degree_avg,
            out_degree_max,
            strongly_connected_components: 0, // TODO: Implement SCC computation
            graph_density,
        }
    }
}

impl Default for DependencyGraph {
    fn default() -> Self {
        Self::new()
    }
}

/// Trait for pattern matching implementations (glob, regex, etc.)
pub trait PatternMatcher: Send + Sync {
    /// Check if a path matches the pattern
    fn matches(&self, path: &Path) -> bool;

    /// Get pattern string for debugging
    fn pattern(&self) -> &str;

    /// Check if pattern is case sensitive
    fn is_case_sensitive(&self) -> bool {
        true
    }
}

/// Trait for output formatting implementations
pub trait OutputFormatter: Send + Sync {
    /// Format analysis results
    fn format_results(&self, results: &[AnalysisResult], config: &Config) -> Result<String>;

    /// Format repository information
    fn format_repository_info(&self, repo_info: &RepositoryInfo, config: &Config)
        -> Result<String>;

    /// Get supported output format name
    fn format_name(&self) -> &'static str;

    /// Get file extension for output format
    fn file_extension(&self) -> &'static str;

    /// Check if format supports streaming output
    fn supports_streaming(&self) -> bool {
        false
    }
}

/// Trait for caching implementations
#[async_trait]
pub trait CacheStorage: Send + Sync {
    /// Get cached result
    async fn get<T>(&self, key: &str) -> Result<Option<T>>
    where
        T: for<'de> Deserialize<'de> + Send;

    /// Store result in cache
    async fn put<T>(&self, key: &str, value: &T, ttl: Option<std::time::Duration>) -> Result<()>
    where
        T: Serialize + Send + Sync;

    /// Remove item from cache
    async fn remove(&self, key: &str) -> Result<()>;

    /// Clear entire cache
    async fn clear(&self) -> Result<()>;

    /// Get cache statistics
    async fn stats(&self) -> Result<CacheStats>;
}

/// Cache statistics
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CacheStats {
    /// Number of items in cache
    pub item_count: usize,
    /// Total cache size in bytes
    pub size_bytes: u64,
    /// Cache hit rate (0.0 - 1.0)
    pub hit_rate: f64,
    /// Number of cache hits
    pub hits: u64,
    /// Number of cache misses
    pub misses: u64,
}

/// Trait for progress reporting implementations
pub trait ProgressReporter: Send + Sync {
    /// Start a new progress bar/indicator
    fn start(&self, total: u64, message: &str);

    /// Update progress
    fn update(&self, current: u64, message: Option<&str>);

    /// Finish progress reporting
    fn finish(&self, message: &str);

    /// Report an error
    fn error(&self, message: &str);

    /// Report a warning
    fn warning(&self, message: &str);

    /// Check if progress reporting is enabled
    fn is_enabled(&self) -> bool;
}

/// Trait for language-specific analysis extensions
#[async_trait]
pub trait LanguageExtension: Send + Sync {
    /// Get supported languages
    fn supported_languages(&self) -> Vec<crate::file::Language>;

    /// Extract dependencies from file content
    async fn extract_dependencies(
        &self,
        content: &str,
        language: crate::file::Language,
    ) -> Result<Vec<String>>;

    /// Detect if file is an entrypoint (main, index, etc.)
    async fn is_entrypoint(&self, file_info: &FileInfo) -> Result<bool>;

    /// Extract documentation/comments
    async fn extract_documentation(
        &self,
        content: &str,
        language: crate::file::Language,
    ) -> Result<Vec<DocumentationBlock>>;

    /// Get extension priority (higher = preferred for language)
    fn priority(&self) -> u8 {
        0
    }
}

/// Documentation block extracted from source code
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DocumentationBlock {
    /// Documentation text
    pub text: String,
    /// Position in file
    pub position: crate::types::Range,
    /// Type of documentation (comment, docstring, etc.)
    pub doc_type: DocumentationType,
}

/// Types of documentation
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub enum DocumentationType {
    /// Single line comment (// or #)
    LineComment,
    /// Block comment (/* */ or """ """)
    BlockComment,
    /// Documentation comment (/// or /** */)
    DocComment,
    /// Docstring (Python, etc.)
    Docstring,
    /// Module-level documentation
    ModuleDoc,
    /// README or markdown documentation
    Readme,
}

/// Trait for plugin registration and discovery
pub trait PluginRegistry: Send + Sync {
    /// Register a file analyzer
    fn register_analyzer(&mut self, analyzer: Box<dyn FileAnalyzer>);

    /// Register a scorer
    fn register_scorer(&mut self, scorer: Box<dyn HeuristicScorer>);

    /// Register a repository analyzer
    fn register_repository_analyzer(&mut self, analyzer: Box<dyn RepositoryAnalyzer>);

    /// Register an output formatter
    fn register_formatter(&mut self, formatter: Box<dyn OutputFormatter>);

    /// Register a language extension
    fn register_language_extension(&mut self, extension: Box<dyn LanguageExtension>);

    /// Get registered analyzers
    fn get_analyzers(&self) -> Vec<&dyn FileAnalyzer>;

    /// Get registered scorers
    fn get_scorers(&self) -> Vec<&dyn HeuristicScorer>;

    /// Get registered formatters
    fn get_formatters(&self) -> Vec<&dyn OutputFormatter>;

    /// Load plugins from directory
    fn load_plugins_from_dir(&mut self, dir: &Path) -> Result<usize>;
}

#[cfg(test)]
mod tests {
    use super::*;

    struct MockAnalyzer;

    #[async_trait]
    impl FileAnalyzer for MockAnalyzer {
        async fn analyze_file(&self, _file_path: &Path, _config: &Config) -> Result<FileInfo> {
            unimplemented!()
        }

        async fn analyze_content(&self, _file_info: &mut FileInfo, _config: &Config) -> Result<()> {
            Ok(())
        }

        fn name(&self) -> &'static str {
            "mock"
        }

        fn version(&self) -> &'static str {
            "1.0.0"
        }
    }

    #[test]
    fn mock_analyzer_metadata() {
        let analyzer = MockAnalyzer;
        assert_eq!(analyzer.name(), "mock");
        assert_eq!(analyzer.version(), "1.0.0");
    }

    #[test]
    fn test_dependency_graph() {
        let mut graph = DependencyGraph::new();

        let metadata = DependencyNodeMetadata {
            path: "test.rs".to_string(),
            language: crate::file::Language::Rust,
            size: 100,
            is_test: false,
            is_entrypoint: false,
        };

        let node1 = graph.add_node("file1.rs".to_string(), metadata.clone());
        let node2 = graph.add_node("file2.rs".to_string(), metadata);

        graph.add_edge(node1, node2);

        assert_eq!(graph.nodes.len(), 2);
        assert_eq!(graph.edges[node1].len(), 1);
        assert_eq!(graph.reverse_edges[node2].len(), 1);

        let stats = graph.stats();
        assert_eq!(stats.total_nodes, 2);
        assert_eq!(stats.total_edges, 1);
    }

    #[test]
    fn test_documentation_block() {
        use crate::types::{Position, Range};

        let doc_block = DocumentationBlock {
            text: "This is a test function".to_string(),
            position: Range::new(Position::new(0, 0), Position::new(0, 23)),
            doc_type: DocumentationType::DocComment,
        };

        assert_eq!(doc_block.doc_type, DocumentationType::DocComment);
        assert!(doc_block.text.contains("test function"));
    }
}