use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::PathBuf;
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum FileCategory {
Code {
language: String,
frameworks: Vec<String>,
},
Documentation {
format: String, complexity: String, },
Configuration {
config_type: String, purpose: String, },
Data {
format: String, structure: String, },
Media {
media_type: String, format: String,
},
Archive {
archive_type: String, compression: String,
},
Script {
interpreter: String, purpose: String, },
Other {
mime_type: String,
description: String,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SemanticMetadata {
pub concepts: Vec<String>,
pub entities: Vec<FileEntity>,
pub keywords: Vec<String>,
pub relevance_score: Option<f64>,
pub semantic_fingerprint: Option<String>,
pub summary: Option<String>,
pub reading_time_minutes: Option<u32>,
pub complexity_score: Option<f64>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileEntity {
pub text: String,
pub entity_type: String,
pub confidence: f64,
pub position: Option<FilePosition>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FilePosition {
pub line_number: usize,
pub character_start: usize,
pub character_end: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileRelationships {
pub similar_files: Vec<SimilarFile>,
pub related_files: Vec<RelatedFile>,
pub referenced_by: Vec<FileReference>,
pub references: Vec<FileReference>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SimilarFile {
pub file_path: PathBuf,
pub similarity_score: f64,
pub similarity_type: SimilarityType,
pub shared_concepts: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum SimilarityType {
Content,
Semantic,
Structural,
Topic,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RelatedFile {
pub file_path: PathBuf,
pub relationship_type: RelationshipType,
pub confidence: f64,
pub explanation: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum RelationshipType {
Dependency,
Thematic,
Temporal,
Structural,
Functional,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileReference {
pub file_path: PathBuf,
pub reference_type: ReferenceType,
pub context: String,
pub line_number: Option<usize>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum ReferenceType {
Import,
Link,
Path,
Documentation,
Configuration,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileSearchResult {
pub file_path: PathBuf,
pub file_name: String,
pub category: FileCategory,
pub relevance_score: f64,
pub match_type: MatchType,
pub matches: Vec<FileMatch>,
pub metadata: SemanticMetadata,
pub file_size: u64,
pub last_modified: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileMatch {
pub line_number: usize,
pub matched_text: String,
pub context_before: Vec<String>,
pub context_after: Vec<String>,
pub confidence: f64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum MatchType {
Exact,
Semantic,
Partial,
Fuzzy,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileIndexStatus {
pub total_files: u64,
pub processing_files: u64,
pub failed_files: u64,
pub index_size_bytes: u64,
pub last_update: String,
pub status: IndexingStatus,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum IndexingStatus {
Idle,
Indexing { progress: f64, current_file: String },
Completed,
Failed { error: String },
Paused,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileOperationConfig {
pub include_directories: Vec<PathBuf>,
pub exclude_directories: Vec<PathBuf>,
pub exclude_patterns: Vec<String>,
pub max_file_size_mb: u64,
pub enable_semantic_analysis: bool,
pub similarity_threshold: f64,
pub max_similar_files: usize,
pub cache_directory: Option<PathBuf>,
}
impl Default for FileOperationConfig {
fn default() -> Self {
Self {
include_directories: vec![],
exclude_directories: vec![
PathBuf::from(".git"),
PathBuf::from("target"),
PathBuf::from("node_modules"),
PathBuf::from(".vscode"),
PathBuf::from(".idea"),
],
exclude_patterns: vec![
"*.tmp".to_string(),
"*.log".to_string(),
"*.cache".to_string(),
],
max_file_size_mb: 100, enable_semantic_analysis: true,
similarity_threshold: 0.7,
max_similar_files: 10,
cache_directory: Some(PathBuf::from(".terraphim_file_cache")),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileAnalysisRequest {
pub file_path: PathBuf,
pub analysis_types: Vec<AnalysisType>,
pub config: FileOperationConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum AnalysisType {
Classification,
SemanticExtraction,
SimilarityAnalysis,
RelationshipAnalysis,
Summarization,
EntityExtraction,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileAnalysisResponse {
pub file_path: PathBuf,
pub results: FileAnalysisResults,
pub processing_time_ms: u64,
pub warnings: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileAnalysisResults {
pub category: Option<FileCategory>,
pub semantic_metadata: Option<SemanticMetadata>,
pub relationships: Option<FileRelationships>,
pub summary: Option<String>,
pub errors: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileSearchRequest {
pub query: String,
pub search_path: Option<PathBuf>,
pub file_types: Option<Vec<String>>,
pub semantic_search: bool,
pub max_results: Option<usize>,
pub config: FileOperationConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileSearchResponse {
pub results: Vec<FileSearchResult>,
pub total_matches: usize,
pub search_time_ms: u64,
pub expanded_query: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileTaggingRequest {
pub file_path: PathBuf,
pub tags: Vec<String>,
pub auto_suggest: bool,
pub config: FileOperationConfig,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileTaggingResponse {
pub file_path: PathBuf,
pub applied_tags: Vec<String>,
pub suggested_tags: Vec<String>,
pub tag_confidence: HashMap<String, f64>,
}
pub mod utils {
use super::*;
pub fn categorize_file(file_path: &std::path::Path, content: Option<&str>) -> FileCategory {
let extension = file_path
.extension()
.and_then(|ext| ext.to_str())
.unwrap_or("");
let file_name = file_path
.file_name()
.and_then(|name| name.to_str())
.unwrap_or("");
match extension {
"rs" => FileCategory::Code {
language: "Rust".to_string(),
frameworks: detect_rust_frameworks(content.unwrap_or("")),
},
"js" | "ts" | "jsx" | "tsx" => FileCategory::Code {
language: "JavaScript".to_string(),
frameworks: detect_js_frameworks(content.unwrap_or("")),
},
"py" => FileCategory::Code {
language: "Python".to_string(),
frameworks: detect_python_frameworks(content.unwrap_or("")),
},
"md" | "markdown" => FileCategory::Documentation {
format: "markdown".to_string(),
complexity: estimate_documentation_complexity(content.unwrap_or("")),
},
"json" => FileCategory::Configuration {
config_type: "json".to_string(),
purpose: infer_json_purpose(file_name, content.unwrap_or("")),
},
"yaml" | "yml" => FileCategory::Configuration {
config_type: "yaml".to_string(),
purpose: infer_yaml_purpose(file_name, content.unwrap_or("")),
},
"toml" => FileCategory::Configuration {
config_type: "toml".to_string(),
purpose: infer_toml_purpose(file_name, content.unwrap_or("")),
},
"csv" => FileCategory::Data {
format: "csv".to_string(),
structure: "structured".to_string(),
},
"png" | "jpg" | "jpeg" | "gif" | "svg" | "webp" => FileCategory::Media {
media_type: "image".to_string(),
format: extension.to_string(),
},
"mp4" | "avi" | "mkv" | "mov" => FileCategory::Media {
media_type: "video".to_string(),
format: extension.to_string(),
},
"mp3" | "wav" | "flac" | "ogg" => FileCategory::Media {
media_type: "audio".to_string(),
format: extension.to_string(),
},
"zip" | "tar" | "gz" | "bz2" | "xz" => FileCategory::Archive {
archive_type: detect_archive_type(extension),
compression: extension.to_string(),
},
"sh" | "bash" | "zsh" | "fish" => FileCategory::Script {
interpreter: "bash".to_string(),
purpose: infer_script_purpose(file_name, content.unwrap_or("")),
},
_ => FileCategory::Other {
mime_type: infer_mime_type(extension),
description: format!("File with extension: {}", extension),
},
}
}
fn detect_rust_frameworks(content: &str) -> Vec<String> {
let mut frameworks = Vec::new();
if content.contains("tokio") {
frameworks.push("tokio".to_string());
}
if content.contains("serde") {
frameworks.push("serde".to_string());
}
if content.contains("actix") {
frameworks.push("actix".to_string());
}
if content.contains("rocket") {
frameworks.push("rocket".to_string());
}
if content.contains("clap") {
frameworks.push("clap".to_string());
}
if content.contains("tracing") {
frameworks.push("tracing".to_string());
}
frameworks
}
fn detect_js_frameworks(content: &str) -> Vec<String> {
let mut frameworks = Vec::new();
if content.contains("react") || content.contains("React") {
frameworks.push("react".to_string());
}
if content.contains("vue") || content.contains("Vue") {
frameworks.push("vue".to_string());
}
if content.contains("angular") || content.contains("Angular") {
frameworks.push("angular".to_string());
}
if content.contains("express") {
frameworks.push("express".to_string());
}
if content.contains("node") || content.contains("Node") {
frameworks.push("node".to_string());
}
if content.contains("webpack") {
frameworks.push("webpack".to_string());
}
if content.contains("typescript") || content.contains("TypeScript") {
frameworks.push("typescript".to_string());
}
frameworks
}
fn detect_python_frameworks(content: &str) -> Vec<String> {
let mut frameworks = Vec::new();
if content.contains("django") || content.contains("Django") {
frameworks.push("django".to_string());
}
if content.contains("flask") || content.contains("Flask") {
frameworks.push("flask".to_string());
}
if content.contains("fastapi") || content.contains("FastAPI") {
frameworks.push("fastapi".to_string());
}
if content.contains("pytest") {
frameworks.push("pytest".to_string());
}
if content.contains("pandas") {
frameworks.push("pandas".to_string());
}
if content.contains("numpy") {
frameworks.push("numpy".to_string());
}
if content.contains("requests") {
frameworks.push("requests".to_string());
}
frameworks
}
fn estimate_documentation_complexity(content: &str) -> String {
let word_count = content.split_whitespace().count();
let code_block_count = content.matches("```").count() / 2;
let _heading_count = content.matches('#').count();
if word_count > 2000 || code_block_count > 10 {
"technical".to_string()
} else if word_count > 500 || code_block_count > 3 {
"detailed".to_string()
} else {
"simple".to_string()
}
}
fn infer_json_purpose(file_name: &str, _content: &str) -> String {
if file_name.contains("package") {
"package".to_string()
} else if file_name.contains("tsconfig") || file_name.contains("config") {
"build".to_string()
} else if file_name.contains("settings") {
"application".to_string()
} else if file_name.contains("manifest") {
"project".to_string()
} else {
"configuration".to_string()
}
}
fn infer_yaml_purpose(file_name: &str, _content: &str) -> String {
if file_name.contains("docker-compose") {
"deployment".to_string()
} else if file_name.contains("github-actions") || file_name.contains(".github") {
"ci-cd".to_string()
} else if file_name.contains("k8s") || file_name.contains("kubernetes") {
"orchestration".to_string()
} else {
"configuration".to_string()
}
}
fn infer_toml_purpose(file_name: &str, _content: &str) -> String {
if file_name.contains("Cargo") {
"build".to_string()
} else if file_name.contains("pyproject") {
"project".to_string()
} else if file_name.contains("toolchain") {
"development".to_string()
} else {
"configuration".to_string()
}
}
fn detect_archive_type(extension: &str) -> String {
match extension {
"zip" => "zip".to_string(),
"tar" => "tar".to_string(),
"gz" | "bz2" | "xz" => "compressed".to_string(),
_ => "archive".to_string(),
}
}
fn infer_script_purpose(file_name: &str, content: &str) -> String {
if file_name.contains("build")
|| content.contains("cargo build")
|| content.contains("npm build")
{
"build".to_string()
} else if file_name.contains("deploy") || content.contains("deploy") {
"deployment".to_string()
} else if file_name.contains("test")
|| content.contains("pytest")
|| content.contains("jest")
{
"testing".to_string()
} else if file_name.contains("setup") || content.contains("install") {
"setup".to_string()
} else {
"utility".to_string()
}
}
fn infer_mime_type(extension: &str) -> String {
match extension {
"txt" => "text/plain".to_string(),
"bin" => "application/octet-stream".to_string(),
"exe" => "application/x-executable".to_string(),
"dll" => "application/x-msdownload".to_string(),
_ => format!("application/x-{}", extension),
}
}
pub fn estimate_reading_time(content: &str) -> u32 {
let word_count = content.split_whitespace().count() as u32;
(word_count / 220).max(1)
}
pub fn calculate_complexity_score(content: &str) -> f64 {
let mut score = 0.0;
let line_count = content.lines().count() as f64;
score += (line_count / 1000.0).min(0.3);
let unique_words: std::collections::HashSet<&str> = content.split_whitespace().collect();
let word_diversity = unique_words.len() as f64 / content.split_whitespace().count() as f64;
score += word_diversity * 0.4;
let code_chars = content
.matches(|c| c == '{' || c == '}' || c == '(' || c == ')')
.count() as f64;
score += (code_chars / 100.0).min(0.3);
score.min(1.0)
}
pub fn generate_semantic_fingerprint(content: &str) -> String {
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
let mut word_counts: HashMap<String, u32> = HashMap::new();
for word in content.split_whitespace().take(1000) {
*word_counts.entry(word.to_lowercase()).or_insert(0) += 1;
}
let mut hasher = DefaultHasher::new();
let mut top_words: Vec<_> = word_counts.into_iter().collect::<Vec<_>>();
top_words.sort_by(|a, b| b.1.cmp(&a.1));
for (word, count) in top_words.into_iter().take(50) {
word.hash(&mut hasher);
count.hash(&mut hasher);
}
format!("{:x}", hasher.finish())
}
pub fn extract_concepts(content: &str) -> Vec<String> {
let mut concepts = Vec::new();
let technical_terms = [
"api",
"database",
"algorithm",
"function",
"class",
"interface",
"service",
"client",
"server",
"protocol",
"authentication",
"authorization",
"encryption",
"security",
"performance",
"scalability",
"architecture",
"design",
"pattern",
"framework",
"library",
"dependency",
"module",
];
for term in technical_terms {
if content.to_lowercase().contains(term) {
concepts.push(term.to_string());
}
}
concepts.sort();
concepts.dedup();
concepts
}
pub fn validate_file_path(path: &str) -> Result<PathBuf> {
let path_buf = PathBuf::from(path);
if !path_buf.exists() {
anyhow::bail!("File does not exist: {}", path);
}
if !path_buf.is_file() {
anyhow::bail!("Path is not a file: {}", path);
}
Ok(path_buf)
}
pub fn validate_directory_path(path: &str) -> Result<PathBuf> {
let path_buf = PathBuf::from(path);
if !path_buf.exists() {
anyhow::bail!("Directory does not exist: {}", path);
}
if !path_buf.is_dir() {
anyhow::bail!("Path is not a directory: {}", path);
}
Ok(path_buf)
}
}
pub struct FileOperationBuilder {
analysis_types: Vec<AnalysisType>,
config: FileOperationConfig,
}
impl FileOperationBuilder {
pub fn new() -> Self {
Self {
analysis_types: vec![
AnalysisType::Classification,
AnalysisType::SemanticExtraction,
],
config: FileOperationConfig::default(),
}
}
pub fn with_analysis_types(mut self, types: Vec<AnalysisType>) -> Self {
self.analysis_types = types;
self
}
pub fn with_config(mut self, config: FileOperationConfig) -> Self {
self.config = config;
self
}
pub fn add_classification(mut self) -> Self {
if !self.analysis_types.contains(&AnalysisType::Classification) {
self.analysis_types.push(AnalysisType::Classification);
}
self
}
pub fn add_semantic_extraction(mut self) -> Self {
if !self
.analysis_types
.contains(&AnalysisType::SemanticExtraction)
{
self.analysis_types.push(AnalysisType::SemanticExtraction);
}
self
}
pub fn add_similarity_analysis(mut self) -> Self {
if !self
.analysis_types
.contains(&AnalysisType::SimilarityAnalysis)
{
self.analysis_types.push(AnalysisType::SimilarityAnalysis);
}
self
}
pub fn add_relationship_analysis(mut self) -> Self {
if !self
.analysis_types
.contains(&AnalysisType::RelationshipAnalysis)
{
self.analysis_types.push(AnalysisType::RelationshipAnalysis);
}
self
}
pub fn add_summarization(mut self) -> Self {
if !self.analysis_types.contains(&AnalysisType::Summarization) {
self.analysis_types.push(AnalysisType::Summarization);
}
self
}
pub fn add_entity_extraction(mut self) -> Self {
if !self
.analysis_types
.contains(&AnalysisType::EntityExtraction)
{
self.analysis_types.push(AnalysisType::EntityExtraction);
}
self
}
pub fn build(self) -> FileOperationConfig {
self.config
}
}
impl Default for FileOperationBuilder {
fn default() -> Self {
Self::new()
}
}