1use crate::search::{SearchIndex, SearchIndexBuilder};
9use crate::{
10 CrateVersion, Directory, DirectoryMut, FileLineRange, Item, ItemQuery, Line, LineQuery,
11 SearchMode,
12};
13use bytes::{Bytes, BytesMut};
14use fnv::FnvHashMap;
15use lru::LruCache;
16use parking_lot::Mutex;
17use regex::RegexBuilder;
18use std::collections::BTreeSet;
19use std::io::{BufRead, Cursor, Read};
20use std::num::NonZeroUsize;
21use std::ops::{Bound, Range, RangeBounds};
22use std::path::{Path, PathBuf};
23use std::sync::Arc;
24use tar::EntryType;
25
26#[derive(Clone)]
28pub struct CrateTar {
29 pub crate_version: CrateVersion,
30 pub tar_data: Vec<u8>,
31}
32
33impl<C, D> From<(C, D)> for CrateTar
34where
35 C: Into<CrateVersion>,
36 D: Into<Vec<u8>>,
37{
38 fn from((c, d): (C, D)) -> Self {
39 CrateTar {
40 crate_version: c.into(),
41 tar_data: d.into(),
42 }
43 }
44}
45
46impl CrateTar {
47 pub fn get_file(&self, file: &str) -> anyhow::Result<Option<String>> {
50 let mut archive = tar::Archive::new(self.tar_data.as_slice());
51 let entries = archive.entries()?;
52 for entry in entries {
53 let Ok(mut entry) = entry else {
54 continue;
55 };
56
57 let Ok(path) = entry.path() else {
58 continue;
59 };
60
61 if self.crate_version.root_dir().join(file).eq(path.as_ref()) {
62 let mut content = String::with_capacity(entry.size() as usize);
63 entry.read_to_string(&mut content)?;
64 return Ok(Some(content));
65 }
66 }
67
68 Ok(None)
69 }
70
71 pub fn get_file_by_range(
74 &self,
75 file: &str,
76 start: impl Into<Option<NonZeroUsize>>,
77 end: impl Into<Option<NonZeroUsize>>,
78 ) -> anyhow::Result<Option<String>> {
79 let mut archive = tar::Archive::new(self.tar_data.as_slice());
80 let entries = archive.entries()?;
81 for entry in entries {
82 let Ok(mut entry) = entry else {
83 continue;
84 };
85
86 let Ok(path) = entry.path() else {
87 continue;
88 };
89
90 if self.crate_version.root_dir().join(file).eq(path.as_ref()) {
91 let mut content = String::with_capacity(entry.size() as usize);
92 entry.read_to_string(&mut content)?;
93 let lines: Vec<&str> = content.lines().collect();
94
95 let start = start.into();
96 let end = end.into();
97
98 let start_line = start.map_or(0, |n| n.get() - 1);
99 let end_line = end.map_or(lines.len(), |n| n.get());
100
101 if start_line > lines.len() {
102 return Ok(Some(String::new()));
103 }
104
105 return Ok(Some(
106 lines[start_line..end_line.min(lines.len())].join("\n"),
107 ));
108 }
109 }
110
111 Ok(None)
112 }
113
114 pub fn get_all_file_list(
117 &self,
118 range: impl RangeBounds<usize>,
119 ) -> anyhow::Result<Option<BTreeSet<PathBuf>>> {
120 let mut archive = tar::Archive::new(self.tar_data.as_slice());
121 let root_dir = self.crate_version.root_dir();
122 let entries = archive.entries()?;
123 let mut list = BTreeSet::default();
124 for (i, entry) in entries.enumerate() {
125 if !range.contains(&i) {
126 continue;
127 }
128 let Ok(entry) = entry else {
129 continue;
130 };
131
132 let Ok(path) = entry.path() else {
133 continue;
134 };
135
136 let Ok(path) = path.strip_prefix(&root_dir) else {
137 continue;
138 };
139 list.insert(path.to_path_buf());
140 }
141 Ok(Some(list))
142 }
143
144 pub fn read_directory<P: AsRef<Path>>(&self, path: P) -> anyhow::Result<Option<Directory>> {
147 let mut archive = tar::Archive::new(self.tar_data.as_slice());
148 let base_dir = self.crate_version.root_dir().join(path);
149 let entries = archive.entries()?;
150 let mut dir = DirectoryMut::default();
151 for entry in entries {
152 let Ok(entry) = entry else {
153 continue;
154 };
155
156 let Ok(path) = entry.path() else {
157 continue;
158 };
159
160 let Ok(path) = path.strip_prefix(&base_dir) else {
161 continue;
162 };
163
164 let mut components = path.components();
165 if let Some(path) = components
166 .next()
167 .map(|comp| PathBuf::from(comp.as_os_str()))
168 {
169 if components.next().is_none() {
170 dir.files.insert(path.to_path_buf());
171 } else {
172 dir.directories.insert(path.to_path_buf());
173 }
174 }
175 }
176
177 Ok(Some(dir.freeze()))
178 }
179}
180
181#[derive(Debug, Default, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
185pub enum FileDataType {
186 Utf8,
188 #[default]
190 NonUtf8,
191}
192
193#[derive(Debug, Clone, Default, PartialEq, Eq, Hash)]
198pub struct CrateFileDataDesc {
199 pub data_type: FileDataType,
201 pub range: Range<usize>,
203}
204
205#[derive(Debug, Clone)]
209pub struct FileContent {
210 pub data_type: FileDataType,
212 pub data: Bytes,
214}
215
216impl From<Bytes> for FileContent {
217 fn from(data: Bytes) -> Self {
218 FileContent {
219 data_type: if std::str::from_utf8(data.as_ref()).is_ok() {
220 FileDataType::Utf8
221 } else {
222 FileDataType::NonUtf8
223 },
224 data,
225 }
226 }
227}
228
229#[derive(Debug, Clone)]
235pub struct Crate {
236 data: Bytes,
237 files_index: Arc<FnvHashMap<PathBuf, CrateFileDataDesc>>,
238 directories_index: Arc<FnvHashMap<PathBuf, Directory>>,
239 item_search_index: SearchIndex,
240}
241
242impl Crate {
243 pub fn get_file_by_file_line_range<P: AsRef<Path>>(
246 &self,
247 file: P,
248 FileLineRange { start, end }: FileLineRange,
249 ) -> anyhow::Result<Option<FileContent>> {
250 match (start, end) {
251 (Some(start), Some(end)) => self.get_file_by_line_range(file, start..=end),
252 (Some(start), None) => self.get_file_by_line_range(file, start..),
253 (None, Some(end)) => self.get_file_by_line_range(file, ..=end),
254 (None, None) => self.get_file_by_line_range(file, ..),
255 }
256 }
257
258 pub fn get_file_by_line_range<P: AsRef<Path>>(
263 &self,
264 file: P,
265 line_range: impl RangeBounds<NonZeroUsize>,
266 ) -> anyhow::Result<Option<FileContent>> {
267 let file = file.as_ref();
268 let Some(CrateFileDataDesc { range, data_type }) = self.files_index.get(file) else {
269 return Ok(None);
270 };
271
272 let data = self.data.slice(range.clone());
273
274 if matches!(
275 (line_range.start_bound(), line_range.end_bound()),
276 (Bound::Unbounded, Bound::Unbounded)
277 ) {
278 return Ok(Some(FileContent {
279 data,
280 data_type: *data_type,
281 }));
282 }
283
284 if let FileDataType::NonUtf8 = data_type {
285 anyhow::bail!("Non-UTF8 formatted files do not support line-range querying.");
286 }
287
288 let s = std::str::from_utf8(data.as_ref())?;
289 let start_line = match line_range.start_bound() {
290 Bound::Included(n) => n.get() - 1,
291 Bound::Excluded(n) => n.get(),
292 Bound::Unbounded => 0,
293 };
294 let end_line = match line_range.end_bound() {
295 Bound::Included(n) => n.get(),
296 Bound::Excluded(n) => n.get() - 1,
297 Bound::Unbounded => usize::MAX,
298 };
299
300 let mut line_start = 0;
301 let mut line_end = s.len();
302 let mut current_line = 0;
303
304 for _ in 0..start_line {
306 if let Some(pos) = s[line_start..].find('\n') {
307 line_start += pos + 1;
308 current_line += 1;
309 } else {
310 break;
312 }
313 }
314
315 if current_line < end_line {
317 line_end = line_start;
318 for _ in current_line..end_line {
319 if let Some(pos) = s[line_end..].find('\n') {
320 line_end += pos + 1;
321 } else {
322 break;
323 }
324 }
325 }
326
327 if line_start < line_end {
328 let line_bytes_range = range.start + line_start..range.start + line_end;
329 return Ok(Some(FileContent {
330 data_type: FileDataType::Utf8,
331 data: self.data.slice(line_bytes_range),
332 }));
333 }
334
335 Ok(None)
336 }
337
338 pub fn read_directory<P: AsRef<Path>>(&self, path: P) -> Option<&Directory> {
341 self.directories_index.get(path.as_ref())
342 }
343
344 pub fn search_item(&self, query: &ItemQuery) -> Vec<Item> {
347 self.item_search_index.search(query)
348 }
349
350 pub fn search_line(&self, query: &LineQuery) -> anyhow::Result<Vec<Line>> {
353 let mut results = Vec::new();
354 let file_ext = query
355 .file_ext
356 .split(",")
357 .map(|s| s.trim())
358 .filter(|s| !s.is_empty())
359 .collect::<Vec<_>>();
360
361 let mut regex_pattern = match query.mode {
362 SearchMode::PlainText => regex::escape(&query.query),
363 SearchMode::Regex => query.query.clone(),
364 };
365
366 if query.whole_word {
368 regex_pattern = format!(r"\b{}\b", regex_pattern);
369 }
370
371 let pattern = RegexBuilder::new(®ex_pattern)
373 .case_insensitive(!query.case_sensitive)
374 .build()?;
375
376 for (path, file_desc) in self.files_index.iter() {
377 if let Some(query_path) = &query.path {
378 if !path.starts_with(query_path) {
379 continue;
380 }
381 };
382 if !file_ext.is_empty() {
383 if let Some(extension) = path.extension() {
384 if !file_ext
385 .iter()
386 .any(|ext| extension.eq_ignore_ascii_case(ext))
387 {
388 continue;
389 }
390 } else {
391 continue;
393 }
394 }
395
396 let content_range = file_desc.range.clone();
397 let content = &self.data.slice(content_range);
398
399 let cursor = Cursor::new(content);
400
401 for (line_number, line) in cursor.lines().enumerate() {
402 let line = line?;
403 let Some(line_number) = NonZeroUsize::new(line_number + 1) else {
404 continue;
405 };
406
407 if let Some(mat) = pattern.find(&line) {
409 let column_range = NonZeroUsize::new(mat.start() + 1).unwrap()
410 ..NonZeroUsize::new(mat.end() + 1).unwrap();
411
412 let line_result = Line {
413 line,
414 file: path.clone(),
415 line_number,
416 column_range,
417 };
418 results.push(line_result);
419
420 if let Some(max_results) = query.max_results {
421 if results.len() >= max_results.get() {
422 break;
423 }
424 }
425 }
426 }
427
428 if let Some(max_results) = query.max_results {
429 if results.len() >= max_results.get() {
430 break;
431 }
432 }
433 }
434
435 Ok(results)
436 }
437}
438
439impl TryFrom<CrateTar> for Crate {
440 type Error = std::io::Error;
441 fn try_from(crate_tar: CrateTar) -> std::io::Result<Self> {
442 let mut archive = tar::Archive::new(crate_tar.tar_data.as_slice());
443 let root_dir = crate_tar.crate_version.root_dir();
444
445 let mut data = BytesMut::new();
446 let mut files_index = FnvHashMap::default();
447 let mut directories_index = FnvHashMap::default();
448 let mut search_index_builder = SearchIndexBuilder::default();
449
450 let mut buffer = Vec::new();
451 let entries = archive.entries()?;
452 for entry in entries {
453 let Ok(mut entry) = entry else {
454 continue;
455 };
456
457 let Ok(path) = entry.path() else {
458 continue;
459 };
460
461 let Ok(path) = path.strip_prefix(&root_dir) else {
462 continue;
463 };
464
465 let Some(last) = path.components().last() else {
466 continue;
467 };
468
469 let filename = PathBuf::from(last.as_os_str());
470 let is_rust_src =
471 matches!(filename.extension(), Some(ext) if ext.eq_ignore_ascii_case("rs"));
472
473 let path = path.to_path_buf();
474 if let EntryType::Regular = entry.header().entry_type() {
475 buffer.clear();
476 entry.read_to_end(&mut buffer)?;
477
478 let data_type = match std::str::from_utf8(&buffer) {
479 Ok(utf8_src) => {
480 if is_rust_src {
481 search_index_builder.update(path.as_path(), utf8_src);
482 }
483 FileDataType::Utf8
484 }
485 Err(_) => FileDataType::NonUtf8,
486 };
487
488 let range = data.len()..data.len() + buffer.len();
489
490 data.extend_from_slice(buffer.as_slice());
491 files_index.insert(path.clone(), CrateFileDataDesc { data_type, range });
492 let parent = path.parent().map(|p| p.to_path_buf()).unwrap_or_default();
493 directories_index
494 .entry(parent)
495 .and_modify(|o: &mut DirectoryMut| {
496 o.files.insert(filename.clone());
497 })
498 .or_insert({
499 let mut set = BTreeSet::default();
500 set.insert(filename);
501 DirectoryMut {
502 files: set,
503 directories: Default::default(),
504 }
505 });
506 }
507 }
508
509 let mut subdirectories_index = FnvHashMap::default();
510 for key in directories_index.keys() {
511 let Some(last) = key.components().last() else {
512 continue;
513 };
514
515 let sub_dir_name = PathBuf::from(last.as_os_str());
516 let parent = key.parent().map(|p| p.to_path_buf()).unwrap_or_default();
517 subdirectories_index
518 .entry(parent)
519 .and_modify(|s: &mut BTreeSet<PathBuf>| {
520 s.insert(sub_dir_name.clone());
521 })
522 .or_insert({
523 let mut set = BTreeSet::default();
524 set.insert(sub_dir_name);
525 set
526 });
527 }
528
529 for (k, directories) in subdirectories_index {
530 directories_index
531 .entry(k)
532 .and_modify(|directory: &mut DirectoryMut| {
533 directory.directories = directories.clone();
534 })
535 .or_insert(DirectoryMut {
536 files: Default::default(),
537 directories,
538 });
539 }
540
541 let directories_index = directories_index
542 .into_iter()
543 .map(|(k, v)| (k, v.freeze()))
544 .collect();
545
546 Ok(Self {
547 data: data.freeze(),
548 files_index: Arc::new(files_index),
549 directories_index: Arc::new(directories_index),
550 item_search_index: search_index_builder.finish(),
551 })
552 }
553}
554
555#[derive(Clone)]
559pub struct CrateCache {
560 lru: Arc<Mutex<LruCache<CrateVersion, Crate, fnv::FnvBuildHasher>>>,
561}
562
563impl Default for CrateCache {
564 fn default() -> Self {
565 Self::new(unsafe { NonZeroUsize::new_unchecked(2048) })
566 }
567}
568
569impl CrateCache {
570 pub fn new(capacity: NonZeroUsize) -> Self {
573 CrateCache {
574 lru: Arc::new(Mutex::new(LruCache::with_hasher(
575 capacity,
576 fnv::FnvBuildHasher::default(),
577 ))),
578 }
579 }
580
581 pub fn get_crate(&self, crate_version: &CrateVersion) -> Option<Crate> {
584 self.lru.lock().get(crate_version).cloned()
585 }
586
587 pub fn set_crate(
590 &self,
591 crate_version: impl Into<CrateVersion>,
592 krate: impl Into<Crate>,
593 ) -> Option<Crate> {
594 self.lru.lock().put(crate_version.into(), krate.into())
595 }
596}