leankg 0.4.2

Lightweight Knowledge Graph for AI-Assisted Development
Documentation
pub mod extractor;
pub mod git;
pub mod parser;

pub use extractor::*;
pub use git::*;
pub use parser::*;

use crate::graph::GraphEngine;
use std::collections::HashSet;
use walkdir::WalkDir;

pub fn find_files_sync(root: &str) -> Result<Vec<String>, Box<dyn std::error::Error>> {
    let mut files = Vec::new();
    let extensions = ["go", "ts", "js", "py", "rs"];

    for entry in WalkDir::new(root)
        .follow_links(true)
        .into_iter()
        .filter_map(|e| e.ok())
    {
        let path = entry.path();
        if path.is_file()
            && path.extension().is_some_and(|ext| extensions.contains(&ext.to_str().unwrap_or("")))
            && !path.to_string_lossy().contains("node_modules")
            && !path.to_string_lossy().contains("vendor")
            && !path.to_string_lossy().contains(".git")
        {
            files.push(path.to_string_lossy().to_string());
        }
    }

    Ok(files)
}

pub fn index_file_sync(
    graph: &GraphEngine,
    parser_manager: &mut ParserManager,
    file_path: &str,
) -> Result<usize, Box<dyn std::error::Error>> {
    let content = std::fs::read(file_path)?;
    let source = content.as_slice();

    let language = if file_path.ends_with(".go") {
        "go"
    } else if file_path.ends_with(".ts") || file_path.ends_with(".js") {
        "typescript"
    } else if file_path.ends_with(".py") {
        "python"
    } else if file_path.ends_with(".rs") {
        "rust"
    } else {
        return Ok(0);
    };

    let parser = parser_manager.get_parser_for_language(language);
    let parser = match parser {
        Some(p) => p,
        None => return Ok(0),
    };

    let tree = parser.parse(source, None).ok_or("Failed to parse")?;

    let extractor = EntityExtractor::new(source, file_path, language);
    let (elements, relationships) = extractor.extract(&tree);

    if elements.is_empty() && relationships.is_empty() {
        return Ok(0);
    }

    let _ = graph.insert_elements(&elements);
    let _ = graph.insert_relationships(&relationships);

    Ok(elements.len())
}

pub fn reindex_file_sync(
    graph: &GraphEngine,
    parser_manager: &mut ParserManager,
    file_path: &str,
) -> Result<usize, Box<dyn std::error::Error>> {
    graph.remove_elements_by_file(file_path)?;
    graph.remove_relationships_by_source(file_path)?;

    index_file_sync(graph, parser_manager, file_path)
}

pub struct IncrementalIndexResult {
    pub changed_files: Vec<String>,
    pub dependent_files: Vec<String>,
    pub total_files_processed: usize,
    pub elements_indexed: usize,
}

pub async fn incremental_index_sync(
    graph: &GraphEngine,
    parser_manager: &mut ParserManager,
    root_path: &str,
) -> Result<IncrementalIndexResult, Box<dyn std::error::Error>> {
    if !GitAnalyzer::is_git_repo() {
        return Err("Not a git repository. Cannot perform incremental indexing.".into());
    }

    let repo_root = GitAnalyzer::get_repo_root().unwrap_or_else(|| root_path.to_string());

    let changed = GitAnalyzer::get_changed_files_since_last_commit()?;

    let deleted_files: Vec<String> = changed
        .deleted
        .iter()
        .map(|f| {
            if std::path::Path::new(f).is_absolute() {
                f.clone()
            } else {
                format!("{}/{}", repo_root, f)
            }
        })
        .collect();

    let mut all_changed: Vec<String> = Vec::new();
    all_changed.extend(changed.modified);
    all_changed.extend(changed.added);
    all_changed.extend(changed.deleted);

    let untracked = GitAnalyzer::get_untracked_files()?;
    let indexable_untracked = filter_indexable_files(&untracked);
    all_changed.extend(indexable_untracked);

    let changed_files: Vec<String> = all_changed
        .iter()
        .map(|f| {
            if std::path::Path::new(f).is_absolute() {
                f.clone()
            } else {
                format!("{}/{}", repo_root, f)
            }
        })
        .collect();

    for deleted_file in &deleted_files {
        graph.remove_elements_by_file(deleted_file)?;
        graph.remove_relationships_by_source(deleted_file)?;
    }

    let all_relationships = graph.all_relationships()?;
    let rel_tuples: Vec<(String, String)> = all_relationships
        .iter()
        .map(|r| (r.source_qualified.clone(), r.target_qualified.clone()))
        .collect();

    let mut dependent_files: Vec<String> = Vec::new();
    for changed_file in &changed_files {
        let file_name = std::path::Path::new(changed_file)
            .file_name()
            .and_then(|n| n.to_str())
            .unwrap_or(changed_file);

        let deps = find_dependents(file_name, &rel_tuples);
        for dep in deps {
            let dep_path = std::path::Path::new(&dep);
            if !dep_path.is_absolute() {
                dependent_files.push(format!("{}/{}", repo_root, dep));
            } else {
                dependent_files.push(dep);
            }
        }
    }

    dependent_files.dedup();

    let mut all_files_to_process: Vec<String> = Vec::new();
    let mut seen: HashSet<String> = HashSet::new();

    for f in &changed_files {
        if !seen.contains(f) {
            all_files_to_process.push(f.clone());
            seen.insert(f.clone());
        }
    }
    for f in &dependent_files {
        if !seen.contains(f) {
            all_files_to_process.push(f.clone());
            seen.insert(f.clone());
        }
    }

    let mut total_elements = 0;
    let mut files_processed = 0;

    for file_path in &all_files_to_process {
        if std::path::Path::new(file_path).exists() {
            match reindex_file_sync(graph, parser_manager, file_path) {
                Ok(count) => {
                    if count > 0 {
                        total_elements += count;
                        files_processed += 1;
                    }
                }
                Err(e) => {
                    tracing::warn!("Failed to reindex {}: {}", file_path, e);
                }
            }
        }
    }

    Ok(IncrementalIndexResult {
        changed_files,
        dependent_files,
        total_files_processed: files_processed,
        elements_indexed: total_elements,
    })
}

#[allow(dead_code)]
pub async fn index_file(
    graph: &GraphEngine,
    parser_manager: &mut ParserManager,
    file_path: &str,
) -> Result<usize, Box<dyn std::error::Error>> {
    Ok(index_file_sync(graph, parser_manager, file_path)?)
}

#[allow(dead_code)]
pub async fn reindex_file(
    graph: &GraphEngine,
    parser_manager: &mut ParserManager,
    file_path: &str,
) -> Result<usize, Box<dyn std::error::Error>> {
    Ok(reindex_file_sync(graph, parser_manager, file_path)?)
}

#[allow(dead_code)]
pub async fn incremental_index(
    graph: &GraphEngine,
    parser_manager: &mut ParserManager,
    root_path: &str,
) -> Result<IncrementalIndexResult, Box<dyn std::error::Error>> {
    incremental_index_sync(graph, parser_manager, root_path).await
}