rust_assistant/
cache.rs

1//! The `cache` module.
2//!
3//! This module provides caching functionalities to optimize performance and reduce
4//! redundant operations, particularly in the context of downloading and storing crate data.
5//! It may include structures like `CrateCache` to store downloaded crates and their metadata
6//! for quick retrieval.
7//!
8use crate::search::{SearchIndex, SearchIndexBuilder};
9use crate::{
10    CrateVersion, Directory, DirectoryMut, FileLineRange, Item, ItemQuery, Line, LineQuery,
11    SearchMode,
12};
13use bytes::{Bytes, BytesMut};
14use fnv::FnvHashMap;
15use lru::LruCache;
16use parking_lot::Mutex;
17use regex::RegexBuilder;
18use std::collections::BTreeSet;
19use std::io::{BufRead, Cursor, Read};
20use std::num::NonZeroUsize;
21use std::ops::{Bound, Range, RangeBounds};
22use std::path::{Path, PathBuf};
23use std::sync::Arc;
24use tar::EntryType;
25
26/// Represents a tarball of a crate, including version information and tar data.
27#[derive(Clone)]
28pub struct CrateTar {
29    pub crate_version: CrateVersion,
30    pub tar_data: Vec<u8>,
31}
32
33impl<C, D> From<(C, D)> for CrateTar
34where
35    C: Into<CrateVersion>,
36    D: Into<Vec<u8>>,
37{
38    fn from((c, d): (C, D)) -> Self {
39        CrateTar {
40            crate_version: c.into(),
41            tar_data: d.into(),
42        }
43    }
44}
45
46impl CrateTar {
47    /// Retrieves the content of a specified file within the crate tarball.
48    ///
49    pub fn get_file(&self, file: &str) -> anyhow::Result<Option<String>> {
50        let mut archive = tar::Archive::new(self.tar_data.as_slice());
51        let entries = archive.entries()?;
52        for entry in entries {
53            let Ok(mut entry) = entry else {
54                continue;
55            };
56
57            let Ok(path) = entry.path() else {
58                continue;
59            };
60
61            if self.crate_version.root_dir().join(file).eq(path.as_ref()) {
62                let mut content = String::with_capacity(entry.size() as usize);
63                entry.read_to_string(&mut content)?;
64                return Ok(Some(content));
65            }
66        }
67
68        Ok(None)
69    }
70
71    /// Retrieves the content of a specified file within a range.
72    ///
73    pub fn get_file_by_range(
74        &self,
75        file: &str,
76        start: impl Into<Option<NonZeroUsize>>,
77        end: impl Into<Option<NonZeroUsize>>,
78    ) -> anyhow::Result<Option<String>> {
79        let mut archive = tar::Archive::new(self.tar_data.as_slice());
80        let entries = archive.entries()?;
81        for entry in entries {
82            let Ok(mut entry) = entry else {
83                continue;
84            };
85
86            let Ok(path) = entry.path() else {
87                continue;
88            };
89
90            if self.crate_version.root_dir().join(file).eq(path.as_ref()) {
91                let mut content = String::with_capacity(entry.size() as usize);
92                entry.read_to_string(&mut content)?;
93                let lines: Vec<&str> = content.lines().collect();
94
95                let start = start.into();
96                let end = end.into();
97
98                let start_line = start.map_or(0, |n| n.get() - 1);
99                let end_line = end.map_or(lines.len(), |n| n.get());
100
101                if start_line > lines.len() {
102                    return Ok(Some(String::new()));
103                }
104
105                return Ok(Some(
106                    lines[start_line..end_line.min(lines.len())].join("\n"),
107                ));
108            }
109        }
110
111        Ok(None)
112    }
113
114    /// Lists all files in the crate within a specified range.
115    ///
116    pub fn get_all_file_list(
117        &self,
118        range: impl RangeBounds<usize>,
119    ) -> anyhow::Result<Option<BTreeSet<PathBuf>>> {
120        let mut archive = tar::Archive::new(self.tar_data.as_slice());
121        let root_dir = self.crate_version.root_dir();
122        let entries = archive.entries()?;
123        let mut list = BTreeSet::default();
124        for (i, entry) in entries.enumerate() {
125            if !range.contains(&i) {
126                continue;
127            }
128            let Ok(entry) = entry else {
129                continue;
130            };
131
132            let Ok(path) = entry.path() else {
133                continue;
134            };
135
136            let Ok(path) = path.strip_prefix(&root_dir) else {
137                continue;
138            };
139            list.insert(path.to_path_buf());
140        }
141        Ok(Some(list))
142    }
143
144    /// Reads the contents of a directory within the crate.
145    ///
146    pub fn read_directory<P: AsRef<Path>>(&self, path: P) -> anyhow::Result<Option<Directory>> {
147        let mut archive = tar::Archive::new(self.tar_data.as_slice());
148        let base_dir = self.crate_version.root_dir().join(path);
149        let entries = archive.entries()?;
150        let mut dir = DirectoryMut::default();
151        for entry in entries {
152            let Ok(entry) = entry else {
153                continue;
154            };
155
156            let Ok(path) = entry.path() else {
157                continue;
158            };
159
160            let Ok(path) = path.strip_prefix(&base_dir) else {
161                continue;
162            };
163
164            let mut components = path.components();
165            if let Some(path) = components
166                .next()
167                .map(|comp| PathBuf::from(comp.as_os_str()))
168            {
169                if components.next().is_none() {
170                    dir.files.insert(path.to_path_buf());
171                } else {
172                    dir.directories.insert(path.to_path_buf());
173                }
174            }
175        }
176
177        Ok(Some(dir.freeze()))
178    }
179}
180
181/// Enumerates the possible data formats of a crate file.
182///
183/// This enum helps in distinguishing between different text encoding formats of the files contained in a crate.
184#[derive(Debug, Default, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
185pub enum FileDataType {
186    /// Represents a UTF-8 formatted file.
187    Utf8,
188    /// Represents a non-UTF-8 formatted file.
189    #[default]
190    NonUtf8,
191}
192
193/// Describes a crate file with its data type and range in the crate's data buffer.
194///
195/// This struct is used to quickly access the file's content and its encoding format.
196///
197#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)]
198pub struct CrateFileDataDesc {
199    /// The data type of the file (UTF-8 or Non-UTF-8).
200    pub data_type: FileDataType,
201    /// The byte range of the file content within the crate's data buffer.
202    pub range: Range<usize>,
203}
204
205/// Contains the actual content of a file within a crate.
206///
207/// This struct holds the file data and its data type, which is useful for encoding-specific operations.
208#[derive(Debug, Clone)]
209pub struct FileContent {
210    /// The data type of the file.
211    pub data_type: FileDataType,
212    /// The byte content of the file.
213    pub data: Bytes,
214}
215
216impl From<Bytes> for FileContent {
217    fn from(data: Bytes) -> Self {
218        FileContent {
219            data_type: if std::str::from_utf8(data.as_ref()).is_ok() {
220                FileDataType::Utf8
221            } else {
222                FileDataType::NonUtf8
223            },
224            data,
225        }
226    }
227}
228
229/// Represents a crate with its data and indexes for quick access to its contents.
230///
231/// This struct stores the complete data of a crate and provides indexes for accessing individual files,
232/// directories, and search functionalities within the crate.
233///
234#[derive(Debug, Clone)]
235pub struct Crate {
236    data: Bytes,
237    files_index: Arc<FnvHashMap<PathBuf, CrateFileDataDesc>>,
238    directories_index: Arc<FnvHashMap<PathBuf, Directory>>,
239    item_search_index: SearchIndex,
240}
241
242impl Crate {
243    /// Retrieves the content of a file by specifying a line range.
244    ///
245    pub fn get_file_by_file_line_range<P: AsRef<Path>>(
246        &self,
247        file: P,
248        FileLineRange { start, end }: FileLineRange,
249    ) -> anyhow::Result<Option<FileContent>> {
250        match (start, end) {
251            (Some(start), Some(end)) => self.get_file_by_line_range(file, start..=end),
252            (Some(start), None) => self.get_file_by_line_range(file, start..),
253            (None, Some(end)) => self.get_file_by_line_range(file, ..=end),
254            (None, None) => self.get_file_by_line_range(file, ..),
255        }
256    }
257
258    /// Retrieves the content of a file by specifying a line range.
259    ///
260    /// This method is used to extract a specific range of lines from a file in the crate.
261    ///
262    pub fn get_file_by_line_range<P: AsRef<Path>>(
263        &self,
264        file: P,
265        line_range: impl RangeBounds<NonZeroUsize>,
266    ) -> anyhow::Result<Option<FileContent>> {
267        let file = file.as_ref();
268        let Some(CrateFileDataDesc { range, data_type }) = self.files_index.get(file) else {
269            return Ok(None);
270        };
271
272        let data = self.data.slice(range.clone());
273
274        if matches!(
275            (line_range.start_bound(), line_range.end_bound()),
276            (Bound::Unbounded, Bound::Unbounded)
277        ) {
278            return Ok(Some(FileContent {
279                data,
280                data_type: *data_type,
281            }));
282        }
283
284        if let FileDataType::NonUtf8 = data_type {
285            anyhow::bail!("Non-UTF8 formatted files do not support line-range querying.");
286        }
287
288        let s = std::str::from_utf8(data.as_ref())?;
289        let start_line = match line_range.start_bound() {
290            Bound::Included(n) => n.get() - 1,
291            Bound::Excluded(n) => n.get(),
292            Bound::Unbounded => 0,
293        };
294        let end_line = match line_range.end_bound() {
295            Bound::Included(n) => n.get(),
296            Bound::Excluded(n) => n.get() - 1,
297            Bound::Unbounded => usize::MAX,
298        };
299
300        let mut line_start = 0;
301        let mut line_end = s.len();
302        let mut current_line = 0;
303
304        // 定位起始行的开始
305        for _ in 0..start_line {
306            if let Some(pos) = s[line_start..].find('\n') {
307                line_start += pos + 1;
308                current_line += 1;
309            } else {
310                // 找不到更多的行
311                break;
312            }
313        }
314
315        // 定位结束行的结束
316        if current_line < end_line {
317            line_end = line_start;
318            for _ in current_line..end_line {
319                if let Some(pos) = s[line_end..].find('\n') {
320                    line_end += pos + 1;
321                } else {
322                    break;
323                }
324            }
325        }
326
327        if line_start < line_end {
328            let line_bytes_range = range.start + line_start..range.start + line_end;
329            return Ok(Some(FileContent {
330                data_type: FileDataType::Utf8,
331                data: self.data.slice(line_bytes_range),
332            }));
333        }
334
335        Ok(None)
336    }
337
338    /// Reads the content of a specified directory within the crate.
339    ///
340    pub fn read_directory<P: AsRef<Path>>(&self, path: P) -> Option<&Directory> {
341        self.directories_index.get(path.as_ref())
342    }
343
344    /// Searches for items in the crate based on a given query.
345    ///
346    pub fn search_item(&self, query: &ItemQuery) -> Vec<Item> {
347        self.item_search_index.search(query)
348    }
349
350    /// Searches for lines in the crate's files based on a given query.
351    ///
352    pub fn search_line(&self, query: &LineQuery) -> anyhow::Result<Vec<Line>> {
353        let mut results = Vec::new();
354        let file_ext = query
355            .file_ext
356            .split(",")
357            .map(|s| s.trim())
358            .filter(|s| !s.is_empty())
359            .collect::<Vec<_>>();
360
361        let mut regex_pattern = match query.mode {
362            SearchMode::PlainText => regex::escape(&query.query),
363            SearchMode::Regex => query.query.clone(),
364        };
365
366        // 如果需要全字匹配,则对模式进行相应包装
367        if query.whole_word {
368            regex_pattern = format!(r"\b{}\b", regex_pattern);
369        }
370
371        // 创建正则表达式,考虑大小写敏感设置
372        let pattern = RegexBuilder::new(&regex_pattern)
373            .case_insensitive(!query.case_sensitive)
374            .build()?;
375
376        for (path, file_desc) in self.files_index.iter() {
377            if let Some(query_path) = &query.path {
378                if !path.starts_with(query_path) {
379                    continue;
380                }
381            };
382            if !file_ext.is_empty() {
383                if let Some(extension) = path.extension() {
384                    if !file_ext
385                        .iter()
386                        .any(|ext| extension.eq_ignore_ascii_case(ext))
387                    {
388                        continue;
389                    }
390                } else {
391                    // 如果路径没有扩展名,则跳过
392                    continue;
393                }
394            }
395
396            let content_range = file_desc.range.clone();
397            let content = &self.data.slice(content_range);
398
399            let cursor = Cursor::new(content);
400
401            for (line_number, line) in cursor.lines().enumerate() {
402                let line = line?;
403                let Some(line_number) = NonZeroUsize::new(line_number + 1) else {
404                    continue;
405                };
406
407                // 使用 pattern 对每一行进行匹配
408                if let Some(mat) = pattern.find(&line) {
409                    let column_range = NonZeroUsize::new(mat.start() + 1).unwrap()
410                        ..NonZeroUsize::new(mat.end() + 1).unwrap();
411
412                    let line_result = Line {
413                        line,
414                        file: path.clone(),
415                        line_number,
416                        column_range,
417                    };
418                    results.push(line_result);
419
420                    if let Some(max_results) = query.max_results {
421                        if results.len() >= max_results.get() {
422                            break;
423                        }
424                    }
425                }
426            }
427
428            if let Some(max_results) = query.max_results {
429                if results.len() >= max_results.get() {
430                    break;
431                }
432            }
433        }
434
435        Ok(results)
436    }
437}
438
439impl TryFrom<CrateTar> for Crate {
440    type Error = std::io::Error;
441    fn try_from(crate_tar: CrateTar) -> std::io::Result<Self> {
442        let mut archive = tar::Archive::new(crate_tar.tar_data.as_slice());
443        let root_dir = crate_tar.crate_version.root_dir();
444
445        let mut data = BytesMut::new();
446        let mut files_index = FnvHashMap::default();
447        let mut directories_index = FnvHashMap::default();
448        let mut search_index_builder = SearchIndexBuilder::default();
449
450        let mut buffer = Vec::new();
451        let entries = archive.entries()?;
452        for entry in entries {
453            let Ok(mut entry) = entry else {
454                continue;
455            };
456
457            let Ok(path) = entry.path() else {
458                continue;
459            };
460
461            let Ok(path) = path.strip_prefix(&root_dir) else {
462                continue;
463            };
464
465            let Some(last) = path.components().last() else {
466                continue;
467            };
468
469            let filename = PathBuf::from(last.as_os_str());
470            let is_rust_src =
471                matches!(filename.extension(), Some(ext) if ext.eq_ignore_ascii_case("rs"));
472
473            let path = path.to_path_buf();
474            if let EntryType::Regular = entry.header().entry_type() {
475                buffer.clear();
476                entry.read_to_end(&mut buffer)?;
477
478                let data_type = match std::str::from_utf8(&buffer) {
479                    Ok(utf8_src) => {
480                        if is_rust_src {
481                            search_index_builder.update(path.as_path(), utf8_src);
482                        }
483                        FileDataType::Utf8
484                    }
485                    Err(_) => FileDataType::NonUtf8,
486                };
487
488                let range = data.len()..data.len() + buffer.len();
489
490                data.extend_from_slice(buffer.as_slice());
491                files_index.insert(path.clone(), CrateFileDataDesc { data_type, range });
492                let parent = path.parent().map(|p| p.to_path_buf()).unwrap_or_default();
493                directories_index
494                    .entry(parent)
495                    .and_modify(|o: &mut DirectoryMut| {
496                        o.files.insert(filename.clone());
497                    })
498                    .or_insert({
499                        let mut set = BTreeSet::default();
500                        set.insert(filename);
501                        DirectoryMut {
502                            files: set,
503                            directories: Default::default(),
504                        }
505                    });
506            }
507        }
508
509        let mut subdirectories_index = FnvHashMap::default();
510        for key in directories_index.keys() {
511            let Some(last) = key.components().last() else {
512                continue;
513            };
514
515            let sub_dir_name = PathBuf::from(last.as_os_str());
516            let parent = key.parent().map(|p| p.to_path_buf()).unwrap_or_default();
517            subdirectories_index
518                .entry(parent)
519                .and_modify(|s: &mut BTreeSet<PathBuf>| {
520                    s.insert(sub_dir_name.clone());
521                })
522                .or_insert({
523                    let mut set = BTreeSet::default();
524                    set.insert(sub_dir_name);
525                    set
526                });
527        }
528
529        for (k, directories) in subdirectories_index {
530            directories_index
531                .entry(k)
532                .and_modify(|directory: &mut DirectoryMut| {
533                    directory.directories = directories.clone();
534                })
535                .or_insert(DirectoryMut {
536                    files: Default::default(),
537                    directories,
538                });
539        }
540
541        let directories_index = directories_index
542            .into_iter()
543            .map(|(k, v)| (k, v.freeze()))
544            .collect();
545
546        Ok(Self {
547            data: data.freeze(),
548            files_index: Arc::new(files_index),
549            directories_index: Arc::new(directories_index),
550            item_search_index: search_index_builder.finish(),
551        })
552    }
553}
554
555/// A cache for storing and retrieving `Crate` instances to minimize redundant operations.
556///
557/// This cache uses a least-recently-used (LRU) strategy and is thread-safe.
558#[derive(Clone)]
559pub struct CrateCache {
560    lru: Arc<Mutex<LruCache<CrateVersion, Crate, fnv::FnvBuildHasher>>>,
561}
562
563impl Default for CrateCache {
564    fn default() -> Self {
565        Self::new(unsafe { NonZeroUsize::new_unchecked(2048) })
566    }
567}
568
569impl CrateCache {
570    /// Creates a new `CrateCache` with a specified capacity.
571    ///
572    pub fn new(capacity: NonZeroUsize) -> Self {
573        CrateCache {
574            lru: Arc::new(Mutex::new(LruCache::with_hasher(
575                capacity,
576                fnv::FnvBuildHasher::default(),
577            ))),
578        }
579    }
580
581    /// Retrieves a crate from the cache if it exists.
582    ///
583    pub fn get_crate(&self, crate_version: &CrateVersion) -> Option<Crate> {
584        self.lru.lock().get(crate_version).cloned()
585    }
586
587    /// Inserts or updates a crate in the cache.
588    ///
589    pub fn set_crate(
590        &self,
591        crate_version: impl Into<CrateVersion>,
592        krate: impl Into<Crate>,
593    ) -> Option<Crate> {
594        self.lru.lock().put(crate_version.into(), krate.into())
595    }
596}