Skip to main content

st/
registry.rs

1//! SmartPastCode Registry Integration
2//!
3//! This module provides integration with the SmartPastCode universal code registry,
4//! enabling automatic indexing of Rust projects and their components.
5
6use anyhow::{Context, Result};
7use reqwest::blocking::Client;
8use reqwest::header::{HeaderMap, HeaderName, HeaderValue};
9use serde::{Deserialize, Serialize};
10use std::path::{Path, PathBuf};
11use std::time::Instant;
12use syn::{File, Item, ItemFn, ItemImpl, ItemMod};
13
14/// Component metadata for SmartPastCode registry
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct CodeComponent {
17    /// Unique identifier (content hash)
18    pub id: String,
19
20    /// Component type
21    pub component_type: ComponentType,
22
23    /// The actual Rust code
24    pub content: String,
25
26    /// Discovery metadata
27    pub discovery_metadata: DiscoveryMetadata,
28
29    /// Origin information
30    pub origin: ComponentOrigin,
31
32    /// Security clearance level
33    pub clearance: ClearanceLevel,
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
37#[serde(rename_all = "PascalCase")]
38pub enum ComponentType {
39    Function,
40    Module,
41    Class,
42    MiniCrate,
43    Folder,
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct DiscoveryMetadata {
48    /// Language (always "rust")
49    pub language: String,
50
51    /// Domain (networking, database, etc.)
52    pub domains: Vec<String>,
53
54    /// Purpose (authentication, parsing, etc.)
55    pub purposes: Vec<String>,
56
57    /// Keywords extracted from code
58    pub keywords: Vec<String>,
59
60    /// Is async code?
61    pub is_async: bool,
62}
63
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct ComponentOrigin {
66    /// Project path
67    pub project_path: String,
68
69    /// File path
70    pub file_path: String,
71
72    /// Line number
73    pub line_number: usize,
74
75    /// Contributor (AI or human)
76    pub contributor: String,
77}
78
79#[derive(Debug, Clone, Serialize, Deserialize)]
80#[serde(rename_all = "PascalCase")]
81pub enum ClearanceLevel {
82    Private = 0,
83    Team = 1,
84    Internal = 2,
85    CompanyPublic = 3,
86    WorldPublic = 10,
87}
88
89/// Marine-inspired code analyzer for Rust
90pub struct MarineCodeAnalyzer {
91    client: Client,
92    registry_url: String,
93    contributor: String,
94}
95
96impl MarineCodeAnalyzer {
97    /// Create a new analyzer
98    pub fn new(registry_url: &str) -> Result<Self> {
99        let mut headers = HeaderMap::new();
100
101        if let Ok(token) = std::env::var("ST_ROOT_TOKEN") {
102            let mut auth_value = HeaderValue::try_from(token)
103                .context("Invalid characters in ST_ROOT_TOKEN")?;
104            auth_value.set_sensitive(true);
105            let header_name = HeaderName::from_static("x-api-key");
106            headers.insert(header_name, auth_value);
107        }
108
109        let client = Client::builder()
110            .timeout(std::time::Duration::from_secs(30))
111            .default_headers(headers)
112            .build()
113            .context("Failed to create HTTP client")?;
114
115        let contributor = whoami::username();
116
117        Ok(Self {
118            client,
119            registry_url: registry_url.to_string(),
120            contributor,
121        })
122    }
123
124    /// Index a Rust file
125    pub fn index_file(&self, file_path: &Path, project_path: &Path) -> Result<Vec<CodeComponent>> {
126        let content = std::fs::read_to_string(file_path).context("Failed to read file")?;
127
128        let syntax: File = syn::parse_file(&content).context("Failed to parse Rust file")?;
129
130        let mut components = Vec::new();
131
132        // Extract functions and impl blocks
133        for item in &syntax.items {
134            match item {
135                Item::Fn(func) => {
136                    if let Some(component) =
137                        self.extract_function(func, file_path, project_path, &content)
138                    {
139                        components.push(component);
140                    }
141                }
142                Item::Impl(impl_block) => {
143                    for component in
144                        self.extract_impl_methods(impl_block, file_path, project_path, &content)
145                    {
146                        components.push(component);
147                    }
148                }
149                Item::Mod(module) => {
150                    if let Some(component) =
151                        self.extract_module(module, file_path, project_path, &content)
152                    {
153                        components.push(component);
154                    }
155                }
156                _ => {}
157            }
158        }
159
160        Ok(components)
161    }
162
163    /// Extract a function as a component
164    fn extract_function(
165        &self,
166        func: &ItemFn,
167        file_path: &Path,
168        project_path: &Path,
169        full_content: &str,
170    ) -> Option<CodeComponent> {
171        let _func_name = func.sig.ident.to_string();
172
173        // Extract the function code
174        let func_code = quote::quote!(#func).to_string();
175
176        // Get line number
177        let line_number = self.get_line_number(full_content, &func_code);
178
179        // Analyze metadata
180        let metadata = self.analyze_metadata(&func_code, &func.sig);
181
182        // Generate ID from content hash
183        let id = self.generate_id(&func_code);
184
185        Some(CodeComponent {
186            id,
187            component_type: ComponentType::Function,
188            content: func_code,
189            discovery_metadata: metadata,
190            origin: ComponentOrigin {
191                project_path: project_path.display().to_string(),
192                file_path: file_path.display().to_string(),
193                line_number,
194                contributor: self.contributor.clone(),
195            },
196            clearance: ClearanceLevel::WorldPublic,
197        })
198    }
199
200    /// Extract methods from impl blocks
201    fn extract_impl_methods(
202        &self,
203        impl_block: &ItemImpl,
204        file_path: &Path,
205        project_path: &Path,
206        full_content: &str,
207    ) -> Vec<CodeComponent> {
208        let mut components = Vec::new();
209
210        for item in &impl_block.items {
211            if let syn::ImplItem::Fn(method) = item {
212                let method_code = quote::quote!(#method).to_string();
213                let line_number = self.get_line_number(full_content, &method_code);
214                let metadata = self.analyze_metadata(&method_code, &method.sig);
215                let id = self.generate_id(&method_code);
216
217                components.push(CodeComponent {
218                    id,
219                    component_type: ComponentType::Function,
220                    content: method_code,
221                    discovery_metadata: metadata,
222                    origin: ComponentOrigin {
223                        project_path: project_path.display().to_string(),
224                        file_path: file_path.display().to_string(),
225                        line_number,
226                        contributor: self.contributor.clone(),
227                    },
228                    clearance: ClearanceLevel::WorldPublic,
229                });
230            }
231        }
232
233        components
234    }
235
236    /// Extract module as a component
237    fn extract_module(
238        &self,
239        module: &ItemMod,
240        file_path: &Path,
241        project_path: &Path,
242        full_content: &str,
243    ) -> Option<CodeComponent> {
244        let module_code = quote::quote!(#module).to_string();
245        let line_number = self.get_line_number(full_content, &module_code);
246        let id = self.generate_id(&module_code);
247
248        // Simple metadata for modules
249        let metadata = DiscoveryMetadata {
250            language: "rust".to_string(),
251            domains: vec![],
252            purposes: vec!["module".to_string()],
253            keywords: vec![module.ident.to_string()],
254            is_async: false,
255        };
256
257        Some(CodeComponent {
258            id,
259            component_type: ComponentType::Module,
260            content: module_code,
261            discovery_metadata: metadata,
262            origin: ComponentOrigin {
263                project_path: project_path.display().to_string(),
264                file_path: file_path.display().to_string(),
265                line_number,
266                contributor: self.contributor.clone(),
267            },
268            clearance: ClearanceLevel::WorldPublic,
269        })
270    }
271
272    /// Analyze code metadata
273    fn analyze_metadata(&self, code: &str, sig: &syn::Signature) -> DiscoveryMetadata {
274        let mut domains = Vec::new();
275        let mut purposes = Vec::new();
276        let mut keywords = Vec::new();
277
278        // Detect async
279        let is_async = sig.asyncness.is_some();
280        if is_async {
281            keywords.push("async".to_string());
282        }
283
284        // Detect domains from common imports
285        if code.contains("tokio::net") || code.contains("async_std::net") {
286            domains.push("networking".to_string());
287        }
288        if code.contains("sqlx") || code.contains("diesel") || code.contains("rusqlite") {
289            domains.push("database".to_string());
290        }
291        if code.contains("serde") || code.contains("serde_json") {
292            domains.push("serialization".to_string());
293        }
294        if code.contains("reqwest") || code.contains("hyper") {
295            domains.push("http".to_string());
296        }
297        if code.contains("tokio::fs") || code.contains("std::fs") {
298            domains.push("filesystem".to_string());
299        }
300
301        // Detect purposes from function names
302        let func_name = sig.ident.to_string().to_lowercase();
303        if func_name.contains("parse") {
304            purposes.push("parsing".to_string());
305        }
306        if func_name.contains("validate") || func_name.contains("check") {
307            purposes.push("validation".to_string());
308        }
309        if func_name.contains("auth") || func_name.contains("login") {
310            purposes.push("authentication".to_string());
311        }
312        if func_name.contains("download") || func_name.contains("upload") {
313            purposes.push("transfer".to_string());
314        }
315        if func_name.contains("process") || func_name.contains("handle") {
316            purposes.push("processing".to_string());
317        }
318
319        // Add function name as keyword
320        keywords.push(sig.ident.to_string());
321
322        DiscoveryMetadata {
323            language: "rust".to_string(),
324            domains,
325            purposes,
326            keywords,
327            is_async,
328        }
329    }
330
331    /// Get line number of code in full content
332    fn get_line_number(&self, full_content: &str, code: &str) -> usize {
333        // Simple heuristic: count newlines before first occurrence
334        let first_line = code.lines().next().unwrap_or("");
335        if let Some(pos) = full_content.find(first_line) {
336            full_content[..pos].lines().count() + 1
337        } else {
338            1
339        }
340    }
341
342    /// Generate ID from content hash
343    fn generate_id(&self, content: &str) -> String {
344        use sha2::{Digest, Sha256};
345        let mut hasher = Sha256::new();
346        hasher.update(content.as_bytes());
347        let result = hasher.finalize();
348        hex::encode(result)
349    }
350
351    /// Submit component to registry
352    pub fn submit_component(&self, component: &CodeComponent) -> Result<()> {
353        let url = format!("{}/components/store", self.registry_url);
354
355        let response = self
356            .client
357            .post(&url)
358            .json(component)
359            .send()
360            .context("Failed to send component to registry")?;
361
362        if !response.status().is_success() {
363            anyhow::bail!(
364                "Registry returned error: {} - {}",
365                response.status(),
366                response.text().unwrap_or_default()
367            );
368        }
369
370        Ok(())
371    }
372
373    /// Batch submit components
374    pub fn submit_batch(&self, components: &[CodeComponent]) -> Result<BatchResult> {
375        let start = Instant::now();
376        let mut success_count = 0;
377        let mut error_count = 0;
378        let mut errors = Vec::new();
379
380        for component in components {
381            match self.submit_component(component) {
382                Ok(_) => success_count += 1,
383                Err(e) => {
384                    error_count += 1;
385                    errors.push(format!("{}: {}", component.origin.file_path, e));
386                }
387            }
388        }
389
390        Ok(BatchResult {
391            total: components.len(),
392            success: success_count,
393            errors: error_count,
394            error_messages: errors,
395            duration: start.elapsed(),
396        })
397    }
398}
399
400/// Result of batch submission
401#[derive(Debug)]
402pub struct BatchResult {
403    pub total: usize,
404    pub success: usize,
405    pub errors: usize,
406    pub error_messages: Vec<String>,
407    pub duration: std::time::Duration,
408}
409
410/// Registry indexer for project scanning
411pub struct RegistryIndexer {
412    analyzer: MarineCodeAnalyzer,
413}
414
415impl RegistryIndexer {
416    pub fn new(registry_url: &str) -> Result<Self> {
417        Ok(Self {
418            analyzer: MarineCodeAnalyzer::new(registry_url)?,
419        })
420    }
421
422    /// Index a project directory
423    pub fn index_project(&self, project_path: &Path) -> Result<IndexingStats> {
424        let start = Instant::now();
425        let mut all_components = Vec::new();
426        let mut files_processed = 0;
427        let mut files_skipped = 0;
428
429        // Find all .rs files
430        let rust_files = self.find_rust_files(project_path)?;
431
432        for file_path in rust_files {
433            match self.analyzer.index_file(&file_path, project_path) {
434                Ok(components) => {
435                    files_processed += 1;
436                    all_components.extend(components);
437                }
438                Err(e) => {
439                    eprintln!("Warning: Failed to index {}: {}", file_path.display(), e);
440                    files_skipped += 1;
441                }
442            }
443        }
444
445        // Submit to registry
446        let batch_result = self.analyzer.submit_batch(&all_components)?;
447
448        Ok(IndexingStats {
449            project_path: project_path.to_path_buf(),
450            files_processed,
451            files_skipped,
452            functions_indexed: all_components.len(),
453            duration: start.elapsed(),
454            batch_result,
455        })
456    }
457
458    /// Find all Rust files in project
459    fn find_rust_files(&self, project_path: &Path) -> Result<Vec<PathBuf>> {
460        let mut files = Vec::new();
461
462        for entry in walkdir::WalkDir::new(project_path)
463            .follow_links(false)
464            .into_iter()
465            .filter_entry(|e| {
466                // Skip common ignore patterns
467                let name = e.file_name().to_str().unwrap_or("");
468                !matches!(name, "target" | "node_modules" | ".git" | "dist" | "build")
469            })
470            .filter_map(|e| e.ok())
471        {
472            if entry.file_type().is_file() {
473                if let Some(ext) = entry.path().extension() {
474                    if ext == "rs" {
475                        files.push(entry.path().to_path_buf());
476                    }
477                }
478            }
479        }
480
481        Ok(files)
482    }
483}
484
485/// Statistics from indexing operation
486#[derive(Debug)]
487pub struct IndexingStats {
488    pub project_path: PathBuf,
489    pub files_processed: usize,
490    pub files_skipped: usize,
491    pub functions_indexed: usize,
492    pub duration: std::time::Duration,
493    pub batch_result: BatchResult,
494}
495
496impl IndexingStats {
497    /// Print summary to stdout
498    pub fn print_summary(&self) {
499        println!("\n╔══════════════════════════════════════════════════════════╗");
500        println!("║     SmartPastCode Registry Indexing Summary             ║");
501        println!("╚══════════════════════════════════════════════════════════╝");
502        println!();
503        println!("Project: {}", self.project_path.display());
504        println!();
505        println!("Files Processed:      {}", self.files_processed);
506        println!("Files Skipped:        {}", self.files_skipped);
507        println!("Functions Indexed:    {}", self.functions_indexed);
508        println!();
509        println!("Registry Submission:");
510        println!("  Total:              {}", self.batch_result.total);
511        println!("  Success:            {}", self.batch_result.success);
512        println!("  Errors:             {}", self.batch_result.errors);
513        println!();
514        println!("Performance:");
515        println!("  Total Duration:     {:.2}s", self.duration.as_secs_f64());
516        println!(
517            "  Indexing Speed:     {:.1} functions/sec",
518            self.functions_indexed as f64 / self.duration.as_secs_f64()
519        );
520
521        if !self.batch_result.error_messages.is_empty() {
522            println!();
523            println!("Errors:");
524            for (i, error) in self.batch_result.error_messages.iter().take(5).enumerate() {
525                println!("  {}. {}", i + 1, error);
526            }
527            if self.batch_result.error_messages.len() > 5 {
528                println!(
529                    "  ... and {} more",
530                    self.batch_result.error_messages.len() - 5
531                );
532            }
533        }
534
535        println!();
536    }
537}