project_rag/types/
mod.rs

1use schemars::JsonSchema;
2use serde::{Deserialize, Serialize};
3
4/// Request to index a codebase
5#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
6pub struct IndexRequest {
7    /// Path to the codebase directory to index
8    pub path: String,
9    /// Optional project name (for multi-project support)
10    #[serde(default)]
11    pub project: Option<String>,
12    /// Optional glob patterns to include (e.g., ["**/*.rs", "**/*.toml"])
13    #[serde(default)]
14    pub include_patterns: Vec<String>,
15    /// Optional glob patterns to exclude (e.g., ["**/target/**", "**/node_modules/**"])
16    #[serde(default)]
17    pub exclude_patterns: Vec<String>,
18    /// Maximum file size in bytes to index (default: 1MB)
19    #[serde(default = "default_max_file_size")]
20    pub max_file_size: usize,
21}
22
23fn default_max_file_size() -> usize {
24    1_048_576 // 1MB
25}
26
27/// Indexing mode used
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema)]
29#[serde(rename_all = "lowercase")]
30pub enum IndexingMode {
31    /// Full indexing (all files)
32    Full,
33    /// Incremental update (only changed files)
34    Incremental,
35}
36
37/// Response from indexing operation
38#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
39pub struct IndexResponse {
40    /// Indexing mode used (full or incremental)
41    pub mode: IndexingMode,
42    /// Number of files successfully indexed
43    pub files_indexed: usize,
44    /// Number of code chunks created
45    pub chunks_created: usize,
46    /// Number of embeddings generated
47    pub embeddings_generated: usize,
48    /// Time taken in milliseconds
49    pub duration_ms: u64,
50    /// Any errors encountered (non-fatal)
51    #[serde(default)]
52    pub errors: Vec<String>,
53    /// Number of files updated (incremental mode only)
54    #[serde(default)]
55    pub files_updated: usize,
56    /// Number of files removed (incremental mode only)
57    #[serde(default)]
58    pub files_removed: usize,
59}
60
61/// Request to query the codebase
62#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
63pub struct QueryRequest {
64    /// The question or search query
65    pub query: String,
66    /// Optional path to filter by specific indexed codebase
67    #[serde(default)]
68    pub path: Option<String>,
69    /// Optional project name to filter by
70    #[serde(default)]
71    pub project: Option<String>,
72    /// Number of results to return (default: 10)
73    #[serde(default = "default_limit")]
74    pub limit: usize,
75    /// Minimum similarity score (0.0 to 1.0, default: 0.7)
76    #[serde(default = "default_min_score")]
77    pub min_score: f32,
78    /// Enable hybrid search (vector + keyword) - default: true
79    #[serde(default = "default_hybrid")]
80    pub hybrid: bool,
81}
82
83fn default_hybrid() -> bool {
84    true
85}
86
87fn default_limit() -> usize {
88    10
89}
90
91fn default_min_score() -> f32 {
92    0.7
93}
94
95/// A single search result
96#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
97pub struct SearchResult {
98    /// File path relative to the indexed root
99    pub file_path: String,
100    /// Absolute path to the indexed root directory
101    #[serde(default)]
102    pub root_path: Option<String>,
103    /// The code chunk content
104    pub content: String,
105    /// Combined similarity score (0.0 to 1.0)
106    pub score: f32,
107    /// Vector similarity score (0.0 to 1.0)
108    pub vector_score: f32,
109    /// Keyword match score (0.0 to 1.0) - only present in hybrid search
110    pub keyword_score: Option<f32>,
111    /// Starting line number in the file
112    pub start_line: usize,
113    /// Ending line number in the file
114    pub end_line: usize,
115    /// Programming language detected
116    pub language: String,
117    /// Optional project name for multi-project support
118    pub project: Option<String>,
119}
120
121/// Response from query operation
122#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
123pub struct QueryResponse {
124    /// List of search results, ordered by relevance
125    pub results: Vec<SearchResult>,
126    /// Time taken in milliseconds
127    pub duration_ms: u64,
128    /// The actual threshold used (may be lower than requested if adaptive search kicked in)
129    #[serde(default)]
130    pub threshold_used: f32,
131    /// Whether the threshold was automatically lowered to find results
132    #[serde(default)]
133    pub threshold_lowered: bool,
134}
135
136/// Request to get statistics about the index
137#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
138pub struct StatisticsRequest {}
139
140/// Statistics about the indexed codebase
141#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
142pub struct StatisticsResponse {
143    /// Total number of indexed files
144    pub total_files: usize,
145    /// Total number of code chunks
146    pub total_chunks: usize,
147    /// Total number of embeddings
148    pub total_embeddings: usize,
149    /// Size of the vector database in bytes
150    pub database_size_bytes: u64,
151    /// Breakdown by programming language
152    pub language_breakdown: Vec<LanguageStats>,
153}
154
155#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
156pub struct LanguageStats {
157    pub language: String,
158    pub file_count: usize,
159    pub chunk_count: usize,
160}
161
162/// Request to clear the index
163#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
164pub struct ClearRequest {}
165
166/// Response from clear operation
167#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
168pub struct ClearResponse {
169    /// Whether the operation was successful
170    pub success: bool,
171    /// Optional message
172    pub message: String,
173}
174
175/// Request for incremental update
176#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
177pub struct IncrementalUpdateRequest {
178    /// Path to the codebase directory
179    pub path: String,
180    /// Optional project name
181    #[serde(default)]
182    pub project: Option<String>,
183    /// Optional glob patterns to include
184    #[serde(default)]
185    pub include_patterns: Vec<String>,
186    /// Optional glob patterns to exclude
187    #[serde(default)]
188    pub exclude_patterns: Vec<String>,
189}
190
191/// Response from incremental update
192#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
193pub struct IncrementalUpdateResponse {
194    /// Number of files added
195    pub files_added: usize,
196    /// Number of files updated
197    pub files_updated: usize,
198    /// Number of files removed
199    pub files_removed: usize,
200    /// Number of chunks created/updated
201    pub chunks_modified: usize,
202    /// Time taken in milliseconds
203    pub duration_ms: u64,
204}
205
206/// Request to search with file type filters
207#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
208pub struct AdvancedSearchRequest {
209    /// The search query
210    pub query: String,
211    /// Optional path to filter by specific indexed codebase
212    #[serde(default)]
213    pub path: Option<String>,
214    /// Optional project name to filter by
215    #[serde(default)]
216    pub project: Option<String>,
217    /// Number of results to return
218    #[serde(default = "default_limit")]
219    pub limit: usize,
220    /// Minimum similarity score
221    #[serde(default = "default_min_score")]
222    pub min_score: f32,
223    /// Filter by file extensions (e.g., ["rs", "toml"])
224    #[serde(default)]
225    pub file_extensions: Vec<String>,
226    /// Filter by programming languages
227    #[serde(default)]
228    pub languages: Vec<String>,
229    /// Filter by file path patterns (glob)
230    #[serde(default)]
231    pub path_patterns: Vec<String>,
232}
233
234/// Request to search git history
235#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
236pub struct SearchGitHistoryRequest {
237    /// The search query
238    pub query: String,
239    /// Path to the codebase (will discover git repo)
240    #[serde(default = "default_git_path")]
241    pub path: String,
242    /// Optional project name
243    #[serde(default)]
244    pub project: Option<String>,
245    /// Optional branch name (default: current branch)
246    #[serde(default)]
247    pub branch: Option<String>,
248    /// Maximum number of commits to index/search (default: 10)
249    #[serde(default = "default_max_commits")]
250    pub max_commits: usize,
251    /// Number of results to return (default: 10)
252    #[serde(default = "default_limit")]
253    pub limit: usize,
254    /// Minimum similarity score (0.0 to 1.0, default: 0.7)
255    #[serde(default = "default_min_score")]
256    pub min_score: f32,
257    /// Filter by commit author (optional regex pattern)
258    #[serde(default)]
259    pub author: Option<String>,
260    /// Filter by commits since this date (ISO 8601 or Unix timestamp)
261    #[serde(default)]
262    pub since: Option<String>,
263    /// Filter by commits until this date (ISO 8601 or Unix timestamp)
264    #[serde(default)]
265    pub until: Option<String>,
266    /// Filter by file path pattern (optional regex)
267    #[serde(default)]
268    pub file_pattern: Option<String>,
269}
270
271fn default_git_path() -> String {
272    ".".to_string()
273}
274
275fn default_max_commits() -> usize {
276    10
277}
278
279/// A single git search result
280#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
281pub struct GitSearchResult {
282    /// Git commit hash (SHA)
283    pub commit_hash: String,
284    /// Commit message
285    pub commit_message: String,
286    /// Author name
287    pub author: String,
288    /// Author email
289    pub author_email: String,
290    /// Commit date (Unix timestamp)
291    pub commit_date: i64,
292    /// Combined similarity score (0.0 to 1.0)
293    pub score: f32,
294    /// Vector similarity score
295    pub vector_score: f32,
296    /// Keyword match score (if hybrid search enabled)
297    pub keyword_score: Option<f32>,
298    /// Files changed in this commit
299    pub files_changed: Vec<String>,
300    /// Diff snippet (first ~500 characters)
301    pub diff_snippet: String,
302}
303
304/// Response from git history search
305#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
306pub struct SearchGitHistoryResponse {
307    /// List of matching commits, ordered by relevance
308    pub results: Vec<GitSearchResult>,
309    /// Number of commits indexed during this search
310    pub commits_indexed: usize,
311    /// Total commits in cache for this repo
312    pub total_cached_commits: usize,
313    /// Time taken in milliseconds
314    pub duration_ms: u64,
315}
316
317// ============================================================================
318// Code Relations Types (find_definition, find_references, get_call_graph)
319// ============================================================================
320
321/// Request to find the definition of a symbol at a given location
322#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
323pub struct FindDefinitionRequest {
324    /// File path (relative or absolute)
325    pub file_path: String,
326    /// Line number (1-based)
327    pub line: usize,
328    /// Column number (0-based)
329    pub column: usize,
330    /// Optional project name to filter by
331    #[serde(default)]
332    pub project: Option<String>,
333}
334
335impl FindDefinitionRequest {
336    /// Validate the find definition request
337    pub fn validate(&self) -> Result<(), String> {
338        if self.file_path.is_empty() {
339            return Err("file_path cannot be empty".to_string());
340        }
341        if self.line == 0 {
342            return Err("line must be 1-based (cannot be 0)".to_string());
343        }
344        Ok(())
345    }
346}
347
348/// Response from find_definition
349#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
350pub struct FindDefinitionResponse {
351    /// The found definition, if any
352    pub definition: Option<crate::relations::DefinitionResult>,
353    /// Precision level of the result
354    pub precision: String,
355    /// Time taken in milliseconds
356    pub duration_ms: u64,
357}
358
359/// Request to find all references to a symbol
360#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
361pub struct FindReferencesRequest {
362    /// File path (relative or absolute)
363    pub file_path: String,
364    /// Line number (1-based)
365    pub line: usize,
366    /// Column number (0-based)
367    pub column: usize,
368    /// Maximum number of references to return
369    #[serde(default = "default_references_limit")]
370    pub limit: usize,
371    /// Optional project name to filter by
372    #[serde(default)]
373    pub project: Option<String>,
374    /// Include the definition itself in results
375    #[serde(default = "default_include_definition")]
376    pub include_definition: bool,
377}
378
379fn default_references_limit() -> usize {
380    100
381}
382
383fn default_include_definition() -> bool {
384    true
385}
386
387impl FindReferencesRequest {
388    /// Validate the find references request
389    pub fn validate(&self) -> Result<(), String> {
390        if self.file_path.is_empty() {
391            return Err("file_path cannot be empty".to_string());
392        }
393        if self.line == 0 {
394            return Err("line must be 1-based (cannot be 0)".to_string());
395        }
396        const MAX_LIMIT: usize = 10000;
397        if self.limit > MAX_LIMIT {
398            return Err(format!("limit too large: {} (max: {})", self.limit, MAX_LIMIT));
399        }
400        Ok(())
401    }
402}
403
404/// Response from find_references
405#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
406pub struct FindReferencesResponse {
407    /// The symbol being referenced
408    pub symbol_name: Option<String>,
409    /// List of found references
410    pub references: Vec<crate::relations::ReferenceResult>,
411    /// Total count (may be higher than returned if limit applied)
412    pub total_count: usize,
413    /// Precision level of the results
414    pub precision: String,
415    /// Time taken in milliseconds
416    pub duration_ms: u64,
417}
418
419/// Request to get call graph for a function
420#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
421pub struct GetCallGraphRequest {
422    /// File path (relative or absolute)
423    pub file_path: String,
424    /// Line number (1-based)
425    pub line: usize,
426    /// Column number (0-based)
427    pub column: usize,
428    /// Maximum depth to traverse (default: 2)
429    #[serde(default = "default_call_graph_depth")]
430    pub depth: usize,
431    /// Optional project name to filter by
432    #[serde(default)]
433    pub project: Option<String>,
434    /// Include callers (functions that call this function)
435    #[serde(default = "default_true")]
436    pub include_callers: bool,
437    /// Include callees (functions this function calls)
438    #[serde(default = "default_true")]
439    pub include_callees: bool,
440}
441
442fn default_call_graph_depth() -> usize {
443    2
444}
445
446fn default_true() -> bool {
447    true
448}
449
450impl GetCallGraphRequest {
451    /// Validate the get call graph request
452    pub fn validate(&self) -> Result<(), String> {
453        if self.file_path.is_empty() {
454            return Err("file_path cannot be empty".to_string());
455        }
456        if self.line == 0 {
457            return Err("line must be 1-based (cannot be 0)".to_string());
458        }
459        const MAX_DEPTH: usize = 10;
460        if self.depth > MAX_DEPTH {
461            return Err(format!("depth too large: {} (max: {})", self.depth, MAX_DEPTH));
462        }
463        Ok(())
464    }
465}
466
467/// Response from get_call_graph
468#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
469pub struct GetCallGraphResponse {
470    /// The root symbol (function/method at the requested location)
471    pub root_symbol: Option<crate::relations::SymbolInfo>,
472    /// Functions/methods that call this symbol (incoming calls)
473    pub callers: Vec<crate::relations::CallGraphNode>,
474    /// Functions/methods called by this symbol (outgoing calls)
475    pub callees: Vec<crate::relations::CallGraphNode>,
476    /// Precision level of the results
477    pub precision: String,
478    /// Time taken in milliseconds
479    pub duration_ms: u64,
480}
481
482/// Metadata stored with each code chunk
483#[derive(Debug, Clone, Serialize, Deserialize)]
484pub struct ChunkMetadata {
485    /// File path relative to indexed root
486    pub file_path: String,
487    /// Absolute path to the indexed root directory
488    #[serde(default)]
489    pub root_path: Option<String>,
490    /// Project name (for multi-project support)
491    pub project: Option<String>,
492    /// Starting line number
493    pub start_line: usize,
494    /// Ending line number
495    pub end_line: usize,
496    /// Programming language
497    pub language: Option<String>,
498    /// File extension
499    pub extension: Option<String>,
500    /// SHA256 hash of the file content
501    pub file_hash: String,
502    /// Timestamp when indexed
503    pub indexed_at: i64,
504}
505
506/// Input validation for request types
507///
508/// These functions validate user inputs to prevent security issues and ensure
509/// reasonable resource usage.
510impl IndexRequest {
511    /// Validate the index request
512    pub fn validate(&self) -> Result<(), String> {
513        // Validate path exists and is a directory
514        let path = std::path::Path::new(&self.path);
515        if !path.exists() {
516            return Err(format!("Path does not exist: {}", self.path));
517        }
518        if !path.is_dir() {
519            return Err(format!("Path is not a directory: {}", self.path));
520        }
521
522        // Canonicalize to prevent path traversal attacks
523        let canonical = path
524            .canonicalize()
525            .map_err(|e| format!("Failed to canonicalize path: {}", e))?;
526
527        // Check that path doesn't try to escape (basic security check)
528        if !canonical.starts_with(
529            std::env::current_dir()
530                .unwrap_or_default()
531                .parent()
532                .unwrap_or(std::path::Path::new("/")),
533        ) {
534            // Allow any absolute path, this check is just to catch obvious traversal attempts
535        }
536
537        // Validate max_file_size is reasonable (max 100MB)
538        const MAX_FILE_SIZE_LIMIT: usize = 100_000_000; // 100MB
539        if self.max_file_size > MAX_FILE_SIZE_LIMIT {
540            return Err(format!(
541                "max_file_size too large: {} bytes (max: {} bytes)",
542                self.max_file_size, MAX_FILE_SIZE_LIMIT
543            ));
544        }
545
546        // Validate project name if provided
547        if let Some(ref project) = self.project {
548            if project.is_empty() {
549                return Err("project name cannot be empty".to_string());
550            }
551            if project.len() > 256 {
552                return Err("project name too long (max 256 characters)".to_string());
553            }
554        }
555
556        Ok(())
557    }
558}
559
560impl QueryRequest {
561    /// Validate the query request
562    pub fn validate(&self) -> Result<(), String> {
563        // Validate query is not empty
564        if self.query.trim().is_empty() {
565            return Err("query cannot be empty".to_string());
566        }
567
568        // Validate query length is reasonable (max 10KB)
569        const MAX_QUERY_LENGTH: usize = 10_240; // 10KB
570        if self.query.len() > MAX_QUERY_LENGTH {
571            return Err(format!(
572                "query too long: {} bytes (max: {} bytes)",
573                self.query.len(),
574                MAX_QUERY_LENGTH
575            ));
576        }
577
578        // Validate min_score is in valid range [0.0, 1.0]
579        if !(0.0..=1.0).contains(&self.min_score) {
580            return Err(format!(
581                "min_score must be between 0.0 and 1.0, got: {}",
582                self.min_score
583            ));
584        }
585
586        // Validate limit is reasonable (max 1000)
587        const MAX_LIMIT: usize = 1000;
588        if self.limit > MAX_LIMIT {
589            return Err(format!(
590                "limit too large: {} (max: {})",
591                self.limit, MAX_LIMIT
592            ));
593        }
594
595        // Validate project name if provided
596        if let Some(ref project) = self.project {
597            if project.is_empty() {
598                return Err("project name cannot be empty".to_string());
599            }
600            if project.len() > 256 {
601                return Err("project name too long (max 256 characters)".to_string());
602            }
603        }
604
605        Ok(())
606    }
607}
608
609impl AdvancedSearchRequest {
610    /// Validate the advanced search request
611    pub fn validate(&self) -> Result<(), String> {
612        // Reuse QueryRequest validation logic
613        let query_req = QueryRequest {
614            query: self.query.clone(),
615            path: None,
616            project: self.project.clone(),
617            limit: self.limit,
618            min_score: self.min_score,
619            hybrid: true,
620        };
621        query_req.validate()?;
622
623        // Additional validation for file extensions
624        for ext in &self.file_extensions {
625            if ext.is_empty() {
626                return Err("file extension cannot be empty".to_string());
627            }
628            if ext.len() > 20 {
629                return Err(format!(
630                    "file extension too long: {} (max 20 characters)",
631                    ext
632                ));
633            }
634        }
635
636        // Validate languages
637        for lang in &self.languages {
638            if lang.is_empty() {
639                return Err("language name cannot be empty".to_string());
640            }
641            if lang.len() > 50 {
642                return Err(format!(
643                    "language name too long: {} (max 50 characters)",
644                    lang
645                ));
646            }
647        }
648
649        Ok(())
650    }
651}
652
653impl SearchGitHistoryRequest {
654    /// Validate the git history search request
655    pub fn validate(&self) -> Result<(), String> {
656        // Validate query
657        if self.query.trim().is_empty() {
658            return Err("query cannot be empty".to_string());
659        }
660
661        const MAX_QUERY_LENGTH: usize = 10_240; // 10KB
662        if self.query.len() > MAX_QUERY_LENGTH {
663            return Err(format!(
664                "query too long: {} bytes (max: {} bytes)",
665                self.query.len(),
666                MAX_QUERY_LENGTH
667            ));
668        }
669
670        // Validate path
671        let path = std::path::Path::new(&self.path);
672        if !path.exists() {
673            return Err(format!("Path does not exist: {}", self.path));
674        }
675
676        // Validate min_score range
677        if !(0.0..=1.0).contains(&self.min_score) {
678            return Err(format!(
679                "min_score must be between 0.0 and 1.0, got: {}",
680                self.min_score
681            ));
682        }
683
684        // Validate limit
685        const MAX_LIMIT: usize = 1000;
686        if self.limit > MAX_LIMIT {
687            return Err(format!(
688                "limit too large: {} (max: {})",
689                self.limit, MAX_LIMIT
690            ));
691        }
692
693        // Validate max_commits
694        const MAX_COMMITS_LIMIT: usize = 10000;
695        if self.max_commits > MAX_COMMITS_LIMIT {
696            return Err(format!(
697                "max_commits too large: {} (max: {})",
698                self.max_commits, MAX_COMMITS_LIMIT
699            ));
700        }
701
702        // Validate project name if provided
703        if let Some(ref project) = self.project {
704            if project.is_empty() {
705                return Err("project name cannot be empty".to_string());
706            }
707            if project.len() > 256 {
708                return Err("project name too long (max 256 characters)".to_string());
709            }
710        }
711
712        Ok(())
713    }
714}
715
716#[cfg(test)]
717mod tests;