1use std::path::{Path, PathBuf};
2use std::collections::HashMap;
3use std::fs::File;
4use std::io::{BufWriter, BufReader};
5use fst::{IntoStreamer, Streamer, Map, MapBuilder};
6use anyhow::{Result, Context};
7use memmap2::Mmap;
8
9#[derive(Debug)]
11pub struct PathFSTIndex {
12 path_map: Option<Map<Mmap>>,
14 handle_to_path: HashMap<u32, PathBuf>,
16}
17
18impl PathFSTIndex {
19 pub fn new() -> Self {
21 Self {
22 path_map: None,
23 handle_to_path: HashMap::new(),
24 }
25 }
26
27 pub fn build_from_paths<P: AsRef<Path>>(
29 paths: HashMap<PathBuf, u32>,
30 output_path: P,
31 ) -> Result<Self> {
32 let mut sorted_paths: Vec<_> = paths.iter().collect();
34 sorted_paths.sort_by(|a, b| a.0.cmp(b.0));
35
36 let file = File::create(&output_path)?;
38 let mut builder = MapBuilder::new(BufWriter::new(file))?;
39
40 let mut handle_to_path = HashMap::new();
41
42 for (path, &handle) in sorted_paths {
43 let path_str = path.to_string_lossy();
44 let path_bytes = path_str.as_bytes();
45
46 builder.insert(path_bytes, handle as u64)?;
47 handle_to_path.insert(handle, path.clone());
48 }
49
50 builder.finish()?;
51
52 let file = File::open(&output_path)?;
54 let mmap = unsafe { memmap2::Mmap::map(&file)? };
55 let path_map = Map::new(mmap)?;
56
57 Ok(Self {
58 path_map: Some(path_map),
59 handle_to_path,
60 })
61 }
62
63 pub fn load_from_file<P: AsRef<Path>>(
65 fst_path: P,
66 json_path: P,
67 ) -> Result<Self> {
68 let file = File::open(&fst_path)?;
70 let mmap = unsafe { memmap2::Mmap::map(&file)? };
71 let path_map = Map::new(mmap)?;
72
73 let file = File::open(&json_path)?;
75 let reader = BufReader::new(file);
76 let handle_to_path: HashMap<u32, PathBuf> = serde_json::from_reader(reader)
77 .context("Failed to parse handle-to-path mapping")?;
78
79 Ok(Self {
80 path_map: Some(path_map),
81 handle_to_path,
82 })
83 }
84
85 pub fn save_reverse_mapping<P: AsRef<Path>>(&self, path: P) -> Result<()> {
87 let file = File::create(path)?;
88 let writer = BufWriter::new(file);
89 serde_json::to_writer_pretty(writer, &self.handle_to_path)?;
90 Ok(())
91 }
92
93 pub fn get_handle(&self, path: &Path) -> Option<u32> {
95 let path_str = path.to_string_lossy();
96 self.path_map.as_ref()?
97 .get(path_str.as_bytes())
98 .map(|h| h as u32)
99 }
100
101 pub fn get_path(&self, handle: u32) -> Option<&PathBuf> {
103 self.handle_to_path.get(&handle)
104 }
105
106 pub fn find_by_prefix(&self, prefix: &str) -> Vec<(PathBuf, u32)> {
108 let mut results = Vec::new();
109
110 if let Some(ref path_map) = self.path_map {
111 let prefix_bytes = prefix.as_bytes();
112 let mut stream = path_map.range().ge(prefix_bytes).into_stream();
113
114 while let Some((path_bytes, handle)) = stream.next() {
115 if !path_bytes.starts_with(prefix_bytes) {
116 break; }
118
119 if let Ok(path_str) = std::str::from_utf8(path_bytes) {
120 let path = PathBuf::from(path_str);
121 results.push((path, handle as u32));
122 }
123 }
124 }
125
126 results
127 }
128
129 pub fn find_by_glob(&self, pattern: &str) -> Result<Vec<(PathBuf, u32)>> {
131 let glob = globset::Glob::new(pattern)?;
132 let matcher = glob.compile_matcher();
133
134 let mut results = Vec::new();
135
136 if let Some(prefix) = extract_prefix(pattern) {
138 let candidates = self.find_by_prefix(&prefix);
139 for (path, handle) in candidates {
140 if matcher.is_match(&path) {
141 results.push((path, handle));
142 }
143 }
144 } else if let Some(ref path_map) = self.path_map {
145 let mut stream = path_map.into_stream();
147 while let Some((path_bytes, handle)) = stream.next() {
148 if let Ok(path_str) = std::str::from_utf8(path_bytes) {
149 let path = PathBuf::from(path_str);
150 if matcher.is_match(&path) {
151 results.push((path, handle as u32));
152 }
153 }
154 }
155 }
156
157 Ok(results)
158 }
159
160 pub fn len(&self) -> usize {
162 self.handle_to_path.len()
163 }
164
165 pub fn is_empty(&self) -> bool {
167 self.handle_to_path.is_empty()
168 }
169}
170
171fn extract_prefix(pattern: &str) -> Option<String> {
175 let mut prefix = String::new();
176 let chars: Vec<char> = pattern.chars().collect();
177
178 for &ch in &chars {
179 match ch {
180 '*' | '?' | '[' | '{' => break, _ => prefix.push(ch),
182 }
183 }
184
185 if prefix.contains('/') && !prefix.is_empty() {
187 if let Some(pos) = prefix.rfind('/') {
189 Some(prefix[..=pos].to_string())
190 } else {
191 None
192 }
193 } else {
194 None
195 }
196}
197
198#[cfg(test)]
199mod tests {
200 use super::*;
201 use tempfile::TempDir;
202
203 #[test]
204 fn test_extract_prefix() {
205 assert_eq!(extract_prefix("src/**/*.rs"), Some("src/".to_string()));
206 assert_eq!(extract_prefix("src/main.rs"), Some("src/".to_string()));
207 assert_eq!(extract_prefix("**/*.rs"), None);
208 assert_eq!(extract_prefix("*.rs"), None);
209 assert_eq!(extract_prefix("tests/unit/*.rs"), Some("tests/unit/".to_string()));
210 }
211
212 #[test]
213 fn test_fst_index_basic() -> Result<()> {
214 let temp_dir = TempDir::new()?;
215 let fst_path = temp_dir.path().join("paths.fst");
216
217 let mut paths = HashMap::new();
218 paths.insert(PathBuf::from("src/main.rs"), 1);
219 paths.insert(PathBuf::from("src/lib.rs"), 2);
220 paths.insert(PathBuf::from("tests/test.rs"), 3);
221
222 let index = PathFSTIndex::build_from_paths(paths, &fst_path)?;
223
224 assert_eq!(index.get_handle(&PathBuf::from("src/main.rs")), Some(1));
226 assert_eq!(index.get_handle(&PathBuf::from("nonexistent")), None);
227
228 let src_files = index.find_by_prefix("src/");
230 assert_eq!(src_files.len(), 2);
231
232 Ok(())
233 }
234}