1use anyhow::{Context, Result};
7use reqwest::blocking::Client;
8use reqwest::header::{HeaderMap, HeaderName, HeaderValue};
9use serde::{Deserialize, Serialize};
10use std::path::{Path, PathBuf};
11use std::time::Instant;
12use syn::{File, Item, ItemFn, ItemImpl, ItemMod};
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct CodeComponent {
17 pub id: String,
19
20 pub component_type: ComponentType,
22
23 pub content: String,
25
26 pub discovery_metadata: DiscoveryMetadata,
28
29 pub origin: ComponentOrigin,
31
32 pub clearance: ClearanceLevel,
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
37#[serde(rename_all = "PascalCase")]
38pub enum ComponentType {
39 Function,
40 Module,
41 Class,
42 MiniCrate,
43 Folder,
44}
45
46#[derive(Debug, Clone, Serialize, Deserialize)]
47pub struct DiscoveryMetadata {
48 pub language: String,
50
51 pub domains: Vec<String>,
53
54 pub purposes: Vec<String>,
56
57 pub keywords: Vec<String>,
59
60 pub is_async: bool,
62}
63
64#[derive(Debug, Clone, Serialize, Deserialize)]
65pub struct ComponentOrigin {
66 pub project_path: String,
68
69 pub file_path: String,
71
72 pub line_number: usize,
74
75 pub contributor: String,
77}
78
79#[derive(Debug, Clone, Serialize, Deserialize)]
80#[serde(rename_all = "PascalCase")]
81pub enum ClearanceLevel {
82 Private = 0,
83 Team = 1,
84 Internal = 2,
85 CompanyPublic = 3,
86 WorldPublic = 10,
87}
88
89pub struct MarineCodeAnalyzer {
91 client: Client,
92 registry_url: String,
93 contributor: String,
94}
95
96impl MarineCodeAnalyzer {
97 pub fn new(registry_url: &str) -> Result<Self> {
99 let mut headers = HeaderMap::new();
100
101 if let Ok(token) = std::env::var("ST_ROOT_TOKEN") {
102 let mut auth_value = HeaderValue::try_from(token)
103 .context("Invalid characters in ST_ROOT_TOKEN")?;
104 auth_value.set_sensitive(true);
105 let header_name = HeaderName::from_static("x-api-key");
106 headers.insert(header_name, auth_value);
107 }
108
109 let client = Client::builder()
110 .timeout(std::time::Duration::from_secs(30))
111 .default_headers(headers)
112 .build()
113 .context("Failed to create HTTP client")?;
114
115 let contributor = whoami::username();
116
117 Ok(Self {
118 client,
119 registry_url: registry_url.to_string(),
120 contributor,
121 })
122 }
123
124 pub fn index_file(&self, file_path: &Path, project_path: &Path) -> Result<Vec<CodeComponent>> {
126 let content = std::fs::read_to_string(file_path).context("Failed to read file")?;
127
128 let syntax: File = syn::parse_file(&content).context("Failed to parse Rust file")?;
129
130 let mut components = Vec::new();
131
132 for item in &syntax.items {
134 match item {
135 Item::Fn(func) => {
136 if let Some(component) =
137 self.extract_function(func, file_path, project_path, &content)
138 {
139 components.push(component);
140 }
141 }
142 Item::Impl(impl_block) => {
143 for component in
144 self.extract_impl_methods(impl_block, file_path, project_path, &content)
145 {
146 components.push(component);
147 }
148 }
149 Item::Mod(module) => {
150 if let Some(component) =
151 self.extract_module(module, file_path, project_path, &content)
152 {
153 components.push(component);
154 }
155 }
156 _ => {}
157 }
158 }
159
160 Ok(components)
161 }
162
163 fn extract_function(
165 &self,
166 func: &ItemFn,
167 file_path: &Path,
168 project_path: &Path,
169 full_content: &str,
170 ) -> Option<CodeComponent> {
171 let _func_name = func.sig.ident.to_string();
172
173 let func_code = quote::quote!(#func).to_string();
175
176 let line_number = self.get_line_number(full_content, &func_code);
178
179 let metadata = self.analyze_metadata(&func_code, &func.sig);
181
182 let id = self.generate_id(&func_code);
184
185 Some(CodeComponent {
186 id,
187 component_type: ComponentType::Function,
188 content: func_code,
189 discovery_metadata: metadata,
190 origin: ComponentOrigin {
191 project_path: project_path.display().to_string(),
192 file_path: file_path.display().to_string(),
193 line_number,
194 contributor: self.contributor.clone(),
195 },
196 clearance: ClearanceLevel::WorldPublic,
197 })
198 }
199
200 fn extract_impl_methods(
202 &self,
203 impl_block: &ItemImpl,
204 file_path: &Path,
205 project_path: &Path,
206 full_content: &str,
207 ) -> Vec<CodeComponent> {
208 let mut components = Vec::new();
209
210 for item in &impl_block.items {
211 if let syn::ImplItem::Fn(method) = item {
212 let method_code = quote::quote!(#method).to_string();
213 let line_number = self.get_line_number(full_content, &method_code);
214 let metadata = self.analyze_metadata(&method_code, &method.sig);
215 let id = self.generate_id(&method_code);
216
217 components.push(CodeComponent {
218 id,
219 component_type: ComponentType::Function,
220 content: method_code,
221 discovery_metadata: metadata,
222 origin: ComponentOrigin {
223 project_path: project_path.display().to_string(),
224 file_path: file_path.display().to_string(),
225 line_number,
226 contributor: self.contributor.clone(),
227 },
228 clearance: ClearanceLevel::WorldPublic,
229 });
230 }
231 }
232
233 components
234 }
235
236 fn extract_module(
238 &self,
239 module: &ItemMod,
240 file_path: &Path,
241 project_path: &Path,
242 full_content: &str,
243 ) -> Option<CodeComponent> {
244 let module_code = quote::quote!(#module).to_string();
245 let line_number = self.get_line_number(full_content, &module_code);
246 let id = self.generate_id(&module_code);
247
248 let metadata = DiscoveryMetadata {
250 language: "rust".to_string(),
251 domains: vec![],
252 purposes: vec!["module".to_string()],
253 keywords: vec![module.ident.to_string()],
254 is_async: false,
255 };
256
257 Some(CodeComponent {
258 id,
259 component_type: ComponentType::Module,
260 content: module_code,
261 discovery_metadata: metadata,
262 origin: ComponentOrigin {
263 project_path: project_path.display().to_string(),
264 file_path: file_path.display().to_string(),
265 line_number,
266 contributor: self.contributor.clone(),
267 },
268 clearance: ClearanceLevel::WorldPublic,
269 })
270 }
271
272 fn analyze_metadata(&self, code: &str, sig: &syn::Signature) -> DiscoveryMetadata {
274 let mut domains = Vec::new();
275 let mut purposes = Vec::new();
276 let mut keywords = Vec::new();
277
278 let is_async = sig.asyncness.is_some();
280 if is_async {
281 keywords.push("async".to_string());
282 }
283
284 if code.contains("tokio::net") || code.contains("async_std::net") {
286 domains.push("networking".to_string());
287 }
288 if code.contains("sqlx") || code.contains("diesel") || code.contains("rusqlite") {
289 domains.push("database".to_string());
290 }
291 if code.contains("serde") || code.contains("serde_json") {
292 domains.push("serialization".to_string());
293 }
294 if code.contains("reqwest") || code.contains("hyper") {
295 domains.push("http".to_string());
296 }
297 if code.contains("tokio::fs") || code.contains("std::fs") {
298 domains.push("filesystem".to_string());
299 }
300
301 let func_name = sig.ident.to_string().to_lowercase();
303 if func_name.contains("parse") {
304 purposes.push("parsing".to_string());
305 }
306 if func_name.contains("validate") || func_name.contains("check") {
307 purposes.push("validation".to_string());
308 }
309 if func_name.contains("auth") || func_name.contains("login") {
310 purposes.push("authentication".to_string());
311 }
312 if func_name.contains("download") || func_name.contains("upload") {
313 purposes.push("transfer".to_string());
314 }
315 if func_name.contains("process") || func_name.contains("handle") {
316 purposes.push("processing".to_string());
317 }
318
319 keywords.push(sig.ident.to_string());
321
322 DiscoveryMetadata {
323 language: "rust".to_string(),
324 domains,
325 purposes,
326 keywords,
327 is_async,
328 }
329 }
330
331 fn get_line_number(&self, full_content: &str, code: &str) -> usize {
333 let first_line = code.lines().next().unwrap_or("");
335 if let Some(pos) = full_content.find(first_line) {
336 full_content[..pos].lines().count() + 1
337 } else {
338 1
339 }
340 }
341
342 fn generate_id(&self, content: &str) -> String {
344 use sha2::{Digest, Sha256};
345 let mut hasher = Sha256::new();
346 hasher.update(content.as_bytes());
347 let result = hasher.finalize();
348 hex::encode(result)
349 }
350
351 pub fn submit_component(&self, component: &CodeComponent) -> Result<()> {
353 let url = format!("{}/components/store", self.registry_url);
354
355 let response = self
356 .client
357 .post(&url)
358 .json(component)
359 .send()
360 .context("Failed to send component to registry")?;
361
362 if !response.status().is_success() {
363 anyhow::bail!(
364 "Registry returned error: {} - {}",
365 response.status(),
366 response.text().unwrap_or_default()
367 );
368 }
369
370 Ok(())
371 }
372
373 pub fn submit_batch(&self, components: &[CodeComponent]) -> Result<BatchResult> {
375 let start = Instant::now();
376 let mut success_count = 0;
377 let mut error_count = 0;
378 let mut errors = Vec::new();
379
380 for component in components {
381 match self.submit_component(component) {
382 Ok(_) => success_count += 1,
383 Err(e) => {
384 error_count += 1;
385 errors.push(format!("{}: {}", component.origin.file_path, e));
386 }
387 }
388 }
389
390 Ok(BatchResult {
391 total: components.len(),
392 success: success_count,
393 errors: error_count,
394 error_messages: errors,
395 duration: start.elapsed(),
396 })
397 }
398}
399
400#[derive(Debug)]
402pub struct BatchResult {
403 pub total: usize,
404 pub success: usize,
405 pub errors: usize,
406 pub error_messages: Vec<String>,
407 pub duration: std::time::Duration,
408}
409
410pub struct RegistryIndexer {
412 analyzer: MarineCodeAnalyzer,
413}
414
415impl RegistryIndexer {
416 pub fn new(registry_url: &str) -> Result<Self> {
417 Ok(Self {
418 analyzer: MarineCodeAnalyzer::new(registry_url)?,
419 })
420 }
421
422 pub fn index_project(&self, project_path: &Path) -> Result<IndexingStats> {
424 let start = Instant::now();
425 let mut all_components = Vec::new();
426 let mut files_processed = 0;
427 let mut files_skipped = 0;
428
429 let rust_files = self.find_rust_files(project_path)?;
431
432 for file_path in rust_files {
433 match self.analyzer.index_file(&file_path, project_path) {
434 Ok(components) => {
435 files_processed += 1;
436 all_components.extend(components);
437 }
438 Err(e) => {
439 eprintln!("Warning: Failed to index {}: {}", file_path.display(), e);
440 files_skipped += 1;
441 }
442 }
443 }
444
445 let batch_result = self.analyzer.submit_batch(&all_components)?;
447
448 Ok(IndexingStats {
449 project_path: project_path.to_path_buf(),
450 files_processed,
451 files_skipped,
452 functions_indexed: all_components.len(),
453 duration: start.elapsed(),
454 batch_result,
455 })
456 }
457
458 fn find_rust_files(&self, project_path: &Path) -> Result<Vec<PathBuf>> {
460 let mut files = Vec::new();
461
462 for entry in walkdir::WalkDir::new(project_path)
463 .follow_links(false)
464 .into_iter()
465 .filter_entry(|e| {
466 let name = e.file_name().to_str().unwrap_or("");
468 !matches!(name, "target" | "node_modules" | ".git" | "dist" | "build")
469 })
470 .filter_map(|e| e.ok())
471 {
472 if entry.file_type().is_file() {
473 if let Some(ext) = entry.path().extension() {
474 if ext == "rs" {
475 files.push(entry.path().to_path_buf());
476 }
477 }
478 }
479 }
480
481 Ok(files)
482 }
483}
484
485#[derive(Debug)]
487pub struct IndexingStats {
488 pub project_path: PathBuf,
489 pub files_processed: usize,
490 pub files_skipped: usize,
491 pub functions_indexed: usize,
492 pub duration: std::time::Duration,
493 pub batch_result: BatchResult,
494}
495
496impl IndexingStats {
497 pub fn print_summary(&self) {
499 println!("\n╔══════════════════════════════════════════════════════════╗");
500 println!("║ SmartPastCode Registry Indexing Summary ║");
501 println!("╚══════════════════════════════════════════════════════════╝");
502 println!();
503 println!("Project: {}", self.project_path.display());
504 println!();
505 println!("Files Processed: {}", self.files_processed);
506 println!("Files Skipped: {}", self.files_skipped);
507 println!("Functions Indexed: {}", self.functions_indexed);
508 println!();
509 println!("Registry Submission:");
510 println!(" Total: {}", self.batch_result.total);
511 println!(" Success: {}", self.batch_result.success);
512 println!(" Errors: {}", self.batch_result.errors);
513 println!();
514 println!("Performance:");
515 println!(" Total Duration: {:.2}s", self.duration.as_secs_f64());
516 println!(
517 " Indexing Speed: {:.1} functions/sec",
518 self.functions_indexed as f64 / self.duration.as_secs_f64()
519 );
520
521 if !self.batch_result.error_messages.is_empty() {
522 println!();
523 println!("Errors:");
524 for (i, error) in self.batch_result.error_messages.iter().take(5).enumerate() {
525 println!(" {}. {}", i + 1, error);
526 }
527 if self.batch_result.error_messages.len() > 5 {
528 println!(
529 " ... and {} more",
530 self.batch_result.error_messages.len() - 5
531 );
532 }
533 }
534
535 println!();
536 }
537}