greppy/parse/
walker.rs

1use crate::core::config::MAX_FILE_SIZE;
2use crate::core::error::Result;
3use ignore::WalkBuilder;
4use std::path::{Path, PathBuf};
5
6/// File info for indexing
7pub struct FileInfo {
8    pub path: PathBuf,
9    pub content: String,
10}
11
12/// Walk a project directory, respecting .gitignore
13pub fn walk_project(root: &Path) -> Result<Vec<FileInfo>> {
14    let mut files = Vec::new();
15
16    let walker = WalkBuilder::new(root)
17        .hidden(true)
18        .git_ignore(true)
19        .git_global(true)
20        .git_exclude(true)
21        .max_filesize(Some(MAX_FILE_SIZE))
22        .build();
23
24    for entry in walker.flatten() {
25        let path = entry.path();
26
27        if !path.is_file() {
28            continue;
29        }
30
31        if !is_code_file(path) {
32            continue;
33        }
34
35        if let Ok(content) = std::fs::read_to_string(path) {
36            files.push(FileInfo {
37                path: path.to_path_buf(),
38                content,
39            });
40        }
41    }
42
43    Ok(files)
44}
45
46/// Check if file is a code file worth indexing
47fn is_code_file(path: &Path) -> bool {
48    let ext = path
49        .extension()
50        .and_then(|e| e.to_str())
51        .unwrap_or("")
52        .to_lowercase();
53
54    matches!(
55        ext.as_str(),
56        "ts" | "tsx"
57            | "js"
58            | "jsx"
59            | "mjs"
60            | "cjs"
61            | "py"
62            | "pyi"
63            | "rs"
64            | "go"
65            | "java"
66            | "kt"
67            | "kts"
68            | "scala"
69            | "rb"
70            | "php"
71            | "c"
72            | "h"
73            | "cpp"
74            | "cc"
75            | "cxx"
76            | "hpp"
77            | "cs"
78            | "swift"
79            | "ex"
80            | "exs"
81            | "erl"
82            | "hrl"
83            | "hs"
84            | "ml"
85            | "mli"
86            | "lua"
87            | "sh"
88            | "bash"
89            | "zsh"
90            | "sql"
91            | "vue"
92            | "svelte"
93            | "md"
94            | "yaml"
95            | "yml"
96            | "toml"
97            | "json"
98    )
99}
100
101/// Detect language from file extension
102pub fn detect_language(path: &Path) -> String {
103    let ext = path
104        .extension()
105        .and_then(|e| e.to_str())
106        .unwrap_or("")
107        .to_lowercase();
108
109    match ext.as_str() {
110        "ts" | "tsx" => "typescript",
111        "js" | "jsx" | "mjs" | "cjs" => "javascript",
112        "py" | "pyi" => "python",
113        "rs" => "rust",
114        "go" => "go",
115        "java" => "java",
116        "kt" | "kts" => "kotlin",
117        "scala" => "scala",
118        "rb" => "ruby",
119        "php" => "php",
120        "c" | "h" => "c",
121        "cpp" | "cc" | "cxx" | "hpp" => "cpp",
122        "cs" => "csharp",
123        "swift" => "swift",
124        "ex" | "exs" => "elixir",
125        "erl" | "hrl" => "erlang",
126        "hs" => "haskell",
127        "ml" | "mli" => "ocaml",
128        "lua" => "lua",
129        "sh" | "bash" | "zsh" => "shell",
130        "sql" => "sql",
131        "vue" => "vue",
132        "svelte" => "svelte",
133        "md" => "markdown",
134        "yaml" | "yml" => "yaml",
135        "toml" => "toml",
136        "json" => "json",
137        _ => "unknown",
138    }
139    .to_string()
140}