concept-analyzer 0.1.1

A unified pipeline that analyzes code repositories and extracts first-principles instructions for AI agents
Documentation
//! File collection utilities

use anyhow::Result;
use serde::{Deserialize, Serialize};
use std::path::{Path, PathBuf};
use walkdir::WalkDir;

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileData {
    pub path: PathBuf,
    pub content: String,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FileCollection {
    pub root_path: PathBuf,
    pub files: Vec<FileData>,
}

pub struct BatchFileCollector {
    max_files_per_batch: usize,
    exclude_patterns: Vec<String>,
}

impl Default for BatchFileCollector {
    fn default() -> Self {
        Self {
            max_files_per_batch: 100,
            exclude_patterns: vec![],
        }
    }
}

impl BatchFileCollector {
    pub fn new() -> Self {
        Self::default()
    }

    pub fn with_max_files_per_batch(mut self, max: usize) -> Self {
        self.max_files_per_batch = max;
        self
    }

    pub fn with_exclude_patterns(mut self, patterns: Vec<String>) -> Self {
        self.exclude_patterns = patterns;
        self
    }

    pub async fn collect_repository(&self, repo_path: &Path) -> Result<Vec<FileCollection>> {
        let mut collections = Vec::new();
        let mut current_batch = Vec::new();

        for entry in WalkDir::new(repo_path) {
            let entry = entry?;
            if entry.file_type().is_file() {
                let path = entry.path();

                // Skip excluded patterns
                let should_skip = self.exclude_patterns.iter().any(|pattern| {
                    glob::Pattern::new(pattern)
                        .ok()
                        .map(|p| p.matches_path(path))
                        .unwrap_or(false)
                });

                if should_skip {
                    continue;
                }

                if let Ok(content) = tokio::fs::read_to_string(path).await {
                    current_batch.push(FileData {
                        path: path.to_path_buf(),
                        content,
                    });

                    if current_batch.len() >= self.max_files_per_batch {
                        collections.push(FileCollection {
                            root_path: repo_path.to_path_buf(),
                            files: std::mem::take(&mut current_batch),
                        });
                    }
                }
            }
        }

        if !current_batch.is_empty() {
            collections.push(FileCollection {
                root_path: repo_path.to_path_buf(),
                files: current_batch,
            });
        }

        Ok(collections)
    }
}