1use anyhow::Result;
2use dashmap::DashMap;
3use hnsw_rs::prelude::*;
4use parking_lot::RwLock;
5use rayon::prelude::*;
6use rusqlite::{params, Connection};
7use serde::{Deserialize, Serialize};
8use std::fs;
9use std::path::PathBuf;
10use std::sync::Arc;
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct VectorEntry {
14 pub id: i64,
15 pub text: String,
16 pub vector: Vec<f64>,
17 pub model: String,
18 pub provider: String,
19 pub created_at: chrono::DateTime<chrono::Utc>,
20 pub file_path: Option<String>,
21 pub chunk_index: Option<i32>,
22 pub total_chunks: Option<i32>,
23}
24
25type HnswIndex = Hnsw<'static, f64, DistCosine>;
27
28pub struct VectorDatabase {
29 db_path: PathBuf,
30 hnsw_index: Arc<RwLock<Option<HnswIndex>>>,
32 vector_cache: Arc<DashMap<i64, VectorEntry>>,
34 index_dirty: Arc<RwLock<bool>>,
36}
37
38impl std::fmt::Debug for VectorDatabase {
39 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
40 f.debug_struct("VectorDatabase")
41 .field("db_path", &self.db_path)
42 .field("vector_cache_len", &self.vector_cache.len())
43 .field("index_dirty", &self.index_dirty)
44 .finish()
45 }
46}
47
48impl VectorDatabase {
49 pub fn new(name: &str) -> Result<Self> {
50 let embeddings_dir = Self::embeddings_dir()?;
51 fs::create_dir_all(&embeddings_dir)?;
52
53 let db_path = embeddings_dir.join(format!("{}.db", name));
54
55 let db = Self {
56 db_path,
57 hnsw_index: Arc::new(RwLock::new(None)),
58 vector_cache: Arc::new(DashMap::new()),
59 index_dirty: Arc::new(RwLock::new(true)),
60 };
61
62 db.initialize()?;
63 Ok(db)
64 }
65
66 pub fn embeddings_dir() -> Result<PathBuf> {
67 let home_dir =
68 dirs::home_dir().ok_or_else(|| anyhow::anyhow!("Could not find home directory"))?;
69 Ok(home_dir.join("Library/Application Support/lc/embeddings"))
70 }
71
72 pub fn list_databases() -> Result<Vec<String>> {
73 let embeddings_dir = Self::embeddings_dir()?;
74 Self::list_databases_in_dir(&embeddings_dir)
75 }
76
77 pub fn list_databases_in_dir(embeddings_dir: &std::path::Path) -> Result<Vec<String>> {
78 if !embeddings_dir.exists() {
79 return Ok(Vec::new());
80 }
81
82 let mut databases = Vec::new();
83
84 for entry in fs::read_dir(embeddings_dir)? {
85 let entry = entry?;
86 let path = entry.path();
87
88 if path.is_file() {
89 if let Some(extension) = path.extension() {
90 if extension == "db" {
91 if let Some(name) = path.file_stem().and_then(|s| s.to_str()) {
92 databases.push(name.to_string());
93 }
94 }
95 }
96 }
97 }
98
99 databases.sort();
100 Ok(databases)
101 }
102
103 pub fn delete_database(name: &str) -> Result<()> {
104 let embeddings_dir = Self::embeddings_dir()?;
105 Self::delete_database_in_dir(name, &embeddings_dir)
106 }
107
108 pub fn delete_database_in_dir(name: &str, embeddings_dir: &std::path::Path) -> Result<()> {
109 let db_path = embeddings_dir.join(format!("{}.db", name));
110
111 if db_path.exists() {
112 fs::remove_file(db_path)?;
113 }
114
115 Ok(())
116 }
117
118 fn initialize(&self) -> Result<()> {
119 let conn = Connection::open(&self.db_path)?;
120
121 conn.execute(
123 "CREATE TABLE IF NOT EXISTS vectors (
124 id INTEGER PRIMARY KEY AUTOINCREMENT,
125 text TEXT NOT NULL,
126 vector BLOB NOT NULL,
127 model TEXT NOT NULL,
128 provider TEXT NOT NULL,
129 created_at TEXT NOT NULL
130 )",
131 [],
132 )?;
133
134 let mut has_file_path = false;
136 let mut has_chunk_index = false;
137 let mut has_total_chunks = false;
138
139 let mut stmt = conn.prepare("PRAGMA table_info(vectors)")?;
141 let column_iter = stmt.query_map([], |row| {
142 let column_name: String = row.get(1)?;
143 Ok(column_name)
144 })?;
145
146 for column_result in column_iter {
147 let column_name = column_result?;
148 match column_name.as_str() {
149 "file_path" => has_file_path = true,
150 "chunk_index" => has_chunk_index = true,
151 "total_chunks" => has_total_chunks = true,
152 _ => {}
153 }
154 }
155
156 if !has_file_path {
158 conn.execute("ALTER TABLE vectors ADD COLUMN file_path TEXT", [])?;
159 }
160 if !has_chunk_index {
161 conn.execute("ALTER TABLE vectors ADD COLUMN chunk_index INTEGER", [])?;
162 }
163 if !has_total_chunks {
164 conn.execute("ALTER TABLE vectors ADD COLUMN total_chunks INTEGER", [])?;
165 }
166
167 conn.execute(
169 "CREATE INDEX IF NOT EXISTS idx_model_provider ON vectors(model, provider)",
170 [],
171 )?;
172
173 conn.execute(
175 "CREATE INDEX IF NOT EXISTS idx_file_path ON vectors(file_path)",
176 [],
177 )?;
178
179 Ok(())
180 }
181
182 pub fn add_vector(
183 &self,
184 text: &str,
185 vector: &[f64],
186 model: &str,
187 provider: &str,
188 ) -> Result<i64> {
189 self.add_vector_with_metadata(text, vector, model, provider, None, None, None)
190 }
191
192 pub fn add_vector_with_metadata(
193 &self,
194 text: &str,
195 vector: &[f64],
196 model: &str,
197 provider: &str,
198 file_path: Option<&str>,
199 chunk_index: Option<i32>,
200 total_chunks: Option<i32>,
201 ) -> Result<i64> {
202 let conn = Connection::open(&self.db_path)?;
203
204 let vector_json = serde_json::to_string(vector)?;
206 let created_at = chrono::Utc::now().to_rfc3339();
207
208 conn.execute(
209 "INSERT INTO vectors (text, vector, model, provider, created_at, file_path, chunk_index, total_chunks) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
210 params![text, vector_json, model, provider, created_at, file_path, chunk_index, total_chunks],
211 )?;
212
213 let id = conn.last_insert_rowid();
214
215 let vector_entry = VectorEntry {
217 id,
218 text: text.to_string(),
219 vector: vector.to_vec(),
220 model: model.to_string(),
221 provider: provider.to_string(),
222 created_at: chrono::Utc::now(),
223 file_path: file_path.map(|s| s.to_string()),
224 chunk_index,
225 total_chunks,
226 };
227
228 self.vector_cache.insert(id, vector_entry);
230
231 *self.index_dirty.write() = true;
233
234 Ok(id)
235 }
236
237 pub fn get_all_vectors(&self) -> Result<Vec<VectorEntry>> {
238 let conn = Connection::open(&self.db_path)?;
239
240 let mut stmt = conn.prepare(
241 "SELECT id, text, vector, model, provider, created_at, file_path, chunk_index, total_chunks FROM vectors ORDER BY created_at DESC"
242 )?;
243
244 let vector_iter = stmt.query_map([], |row| {
245 let vector_json: String = row.get(2)?;
246 let vector: Vec<f64> = serde_json::from_str(&vector_json).map_err(|_e| {
247 rusqlite::Error::InvalidColumnType(
248 2,
249 "vector".to_string(),
250 rusqlite::types::Type::Text,
251 )
252 })?;
253
254 let created_at_str: String = row.get(5)?;
255 let created_at = chrono::DateTime::parse_from_rfc3339(&created_at_str)
256 .map_err(|_| {
257 rusqlite::Error::InvalidColumnType(
258 5,
259 "created_at".to_string(),
260 rusqlite::types::Type::Text,
261 )
262 })?
263 .with_timezone(&chrono::Utc);
264
265 Ok(VectorEntry {
266 id: row.get(0)?,
267 text: row.get(1)?,
268 vector,
269 model: row.get(3)?,
270 provider: row.get(4)?,
271 created_at,
272 file_path: row.get(6).ok(),
273 chunk_index: row.get(7).ok(),
274 total_chunks: row.get(8).ok(),
275 })
276 })?;
277
278 let mut vectors = Vec::new();
279 for vector in vector_iter {
280 vectors.push(vector?);
281 }
282
283 Ok(vectors)
284 }
285
286 pub fn get_model_info(&self) -> Result<Option<(String, String)>> {
287 let conn = Connection::open(&self.db_path)?;
288
289 let mut stmt = conn.prepare("SELECT model, provider FROM vectors LIMIT 1")?;
290
291 let mut rows = stmt.query_map([], |row| {
292 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
293 })?;
294
295 if let Some(row) = rows.next() {
296 Ok(Some(row?))
297 } else {
298 Ok(None)
299 }
300 }
301
302 pub fn find_similar(
303 &self,
304 query_vector: &[f64],
305 limit: usize,
306 ) -> Result<Vec<(VectorEntry, f64)>> {
307 self.ensure_index_built()?;
309
310 if let Some(index) = self.hnsw_index.read().as_ref() {
312 if !self.vector_cache.is_empty() {
314 let first_entry = self.vector_cache.iter().next();
315 if let Some(entry) = first_entry {
316 let stored_dimension = entry.vector.len();
317 if query_vector.len() != stored_dimension {
318 crate::debug_log!("Dimension mismatch: query={}, stored={}, falling back to linear search",
319 query_vector.len(), stored_dimension);
320 return self.find_similar_linear_optimized(query_vector, limit);
321 }
322 }
323 }
324
325 let hnsw_limit = std::cmp::min(limit * 2, self.vector_cache.len());
327 let search_results = index.search(query_vector, hnsw_limit, 50); let mut results = Vec::with_capacity(limit);
330 for neighbor in search_results {
331 if let Some(entry) = self.vector_cache.get(&(neighbor.d_id as i64)) {
332 let similarity = 1.0 - neighbor.distance as f64;
334 results.push((entry.value().clone(), similarity));
335
336 if results.len() >= limit {
338 break;
339 }
340 }
341 }
342
343 if results.len() < limit && results.len() < self.vector_cache.len() {
345 crate::debug_log!(
346 "HNSW returned only {} results, falling back to linear search",
347 results.len()
348 );
349 return self.find_similar_linear_optimized(query_vector, limit);
350 }
351
352 return Ok(results);
353 }
354
355 self.find_similar_linear_optimized(query_vector, limit)
357 }
358
359 fn find_similar_linear_optimized(
361 &self,
362 query_vector: &[f64],
363 limit: usize,
364 ) -> Result<Vec<(VectorEntry, f64)>> {
365 let vectors = if self.vector_cache.is_empty() {
367 self.get_all_vectors()?
368 } else {
369 self.vector_cache
370 .iter()
371 .map(|entry| entry.value().clone())
372 .collect::<Vec<_>>()
373 };
374
375 let mut similarities: Vec<(VectorEntry, f64)> = vectors
377 .into_par_iter()
378 .map(|vector_entry| {
379 let similarity = cosine_similarity_simd(query_vector, &vector_entry.vector);
380 (vector_entry, similarity)
381 })
382 .collect();
383
384 if limit < similarities.len() {
386 similarities.select_nth_unstable_by(limit, |a, b| {
387 b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal)
388 });
389 similarities[..limit]
390 .sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
391 similarities.truncate(limit);
392 } else {
393 similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
394 }
395
396 Ok(similarities)
397 }
398
399 fn ensure_index_built(&self) -> Result<()> {
401 let index_dirty = *self.index_dirty.read();
402
403 if index_dirty || self.hnsw_index.read().is_none() {
404 self.rebuild_index()?;
405 }
406
407 Ok(())
408 }
409
410 fn rebuild_index(&self) -> Result<()> {
412 crate::debug_log!("Rebuilding HNSW index...");
413
414 if self.vector_cache.is_empty() {
416 let vectors = self.get_all_vectors()?;
417 for vector in vectors {
418 self.vector_cache.insert(vector.id, vector);
419 }
420 }
421
422 if self.vector_cache.is_empty() {
423 return Ok(());
424 }
425
426 let first_entry = self.vector_cache.iter().next();
428 if let Some(entry) = first_entry {
429 let dimension = entry.vector.len();
430
431 let hnsw = Hnsw::new(16, dimension, 200, 200, DistCosine {});
433
434 for entry in self.vector_cache.iter() {
436 let vector_entry = entry.value();
437 hnsw.insert((&vector_entry.vector, vector_entry.id as usize));
438 }
439
440 *self.hnsw_index.write() = Some(hnsw);
442 *self.index_dirty.write() = false;
443
444 crate::debug_log!(
445 "HNSW index rebuilt with {} vectors",
446 self.vector_cache.len()
447 );
448 }
449
450 Ok(())
451 }
452
453 pub fn count(&self) -> Result<usize> {
454 let conn = Connection::open(&self.db_path)?;
455
456 let count: i64 = conn.query_row("SELECT COUNT(*) FROM vectors", [], |row| row.get(0))?;
457
458 Ok(count as usize)
459 }
460}
461
462pub fn cosine_similarity_simd(a: &[f64], b: &[f64]) -> f64 {
464 if a.len() != b.len() {
465 crate::debug_log!(
466 "Vector dimension mismatch: query={}, stored={}",
467 a.len(),
468 b.len()
469 );
470 return 0.0;
471 }
472
473 if a.is_empty() {
474 return 0.0;
475 }
476
477 let mut dot_product = 0.0f64;
479 let mut norm_a_sq = 0.0f64;
480 let mut norm_b_sq = 0.0f64;
481
482 let chunk_size = 4;
484 let chunks = a.len() / chunk_size;
485
486 for i in 0..chunks {
487 let start = i * chunk_size;
488 let end = start + chunk_size;
489
490 for j in start..end {
491 let av = a[j];
492 let bv = b[j];
493 dot_product += av * bv;
494 norm_a_sq += av * av;
495 norm_b_sq += bv * bv;
496 }
497 }
498
499 for i in (chunks * chunk_size)..a.len() {
501 let av = a[i];
502 let bv = b[i];
503 dot_product += av * bv;
504 norm_a_sq += av * av;
505 norm_b_sq += bv * bv;
506 }
507
508 let norm_a = norm_a_sq.sqrt();
509 let norm_b = norm_b_sq.sqrt();
510
511 if norm_a == 0.0 || norm_b == 0.0 {
512 return 0.0;
513 }
514
515 dot_product / (norm_a * norm_b)
516}
517
518pub struct FileProcessor;
520
521impl FileProcessor {
522 pub fn is_text_file(path: &std::path::Path) -> bool {
524 if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
526 let ext = ext.to_lowercase();
527 match ext.as_str() {
528 "txt" | "md" | "markdown" | "rst" | "org" | "tex" | "rtf" => true,
530 "rs" | "py" | "js" | "ts" | "java" | "cpp" | "c" | "h" | "hpp" | "go" | "rb"
532 | "php" | "swift" | "kt" | "scala" | "sh" | "bash" | "zsh" | "fish" | "ps1"
533 | "bat" | "cmd" | "html" | "css" | "scss" | "sass" | "less" | "xml" | "json"
534 | "yaml" | "yml" | "toml" | "ini" | "cfg" | "conf" | "sql" | "r" | "m" | "mm"
535 | "pl" | "pm" | "lua" | "vim" | "dockerfile" | "makefile" | "cmake" | "gradle" => {
536 true
537 }
538 "log" | "out" | "err" => true,
540 "exe" | "dll" | "so" | "dylib" | "bin" | "obj" | "o" | "a" | "lib" | "zip"
542 | "tar" | "gz" | "bz2" | "xz" | "7z" | "rar" | "pdf" | "doc" | "docx" | "xls"
543 | "xlsx" | "ppt" | "pptx" | "jpg" | "jpeg" | "png" | "gif" | "bmp" | "tiff"
544 | "svg" | "ico" | "mp3" | "mp4" | "avi" | "mov" | "wmv" | "flv" | "mkv" | "wav"
545 | "flac" | "ogg" => false,
546 _ => {
547 path.file_name()
549 .and_then(|name| name.to_str())
550 .map(|name| !name.contains('.'))
551 .unwrap_or(false)
552 }
553 }
554 } else {
555 Self::is_text_content(path).unwrap_or(false)
557 }
558 }
559
560 fn is_text_content(path: &std::path::Path) -> Result<bool> {
562 use std::fs::File;
563 use std::io::Read;
564
565 let mut file = File::open(path)?;
566 let mut buffer = [0; 512]; let bytes_read = file.read(&mut buffer)?;
568
569 if bytes_read == 0 {
570 return Ok(true); }
572
573 let null_count = buffer[..bytes_read].iter().filter(|&&b| b == 0).count();
575 if null_count > 0 {
576 return Ok(false);
577 }
578
579 let printable_count = buffer[..bytes_read]
581 .iter()
582 .filter(|&&b| b >= 32 && b <= 126 || b == 9 || b == 10 || b == 13)
583 .count();
584
585 let printable_ratio = printable_count as f64 / bytes_read as f64;
586 Ok(printable_ratio > 0.7) }
588
589 pub fn expand_file_patterns(patterns: &[String]) -> Result<Vec<std::path::PathBuf>> {
591 use glob::glob;
592
593 let mut files = Vec::new();
594
595 for pattern in patterns {
596 crate::debug_log!("Processing file pattern: {}", pattern);
597
598 match glob(pattern) {
599 Ok(paths) => {
600 for path_result in paths {
601 match path_result {
602 Ok(path) => {
603 if path.is_file() && Self::is_text_file(&path) {
604 crate::debug_log!("Adding text file: {}", path.display());
605 files.push(path);
606 } else if path.is_file() {
607 crate::debug_log!("Skipping non-text file: {}", path.display());
608 } else {
609 crate::debug_log!("Skipping non-file: {}", path.display());
610 }
611 }
612 Err(e) => {
613 eprintln!(
614 "Warning: Error processing path in pattern '{}': {}",
615 pattern, e
616 );
617 }
618 }
619 }
620 }
621 Err(e) => {
622 eprintln!("Warning: Invalid glob pattern '{}': {}", pattern, e);
623 }
624 }
625 }
626
627 files.sort();
628 files.dedup();
629 Ok(files)
630 }
631
632 pub fn chunk_text(text: &str, chunk_size: usize, overlap: usize) -> Vec<String> {
634 crate::debug_log!(
635 "Chunking text: {} chars, chunk_size: {}, overlap: {}",
636 text.len(),
637 chunk_size,
638 overlap
639 );
640
641 if text.len() <= chunk_size {
642 crate::debug_log!("Text is smaller than chunk size, returning single chunk");
643 return vec![text.to_string()];
644 }
645
646 let mut chunks = Vec::new();
647 let mut start = 0;
648 let mut iteration = 0;
649
650 while start < text.len() {
651 iteration += 1;
652 crate::debug_log!(
653 "Chunk iteration {}: start={}, text.len()={}",
654 iteration,
655 start,
656 text.len()
657 );
658
659 let end = std::cmp::min(start + chunk_size, text.len());
660 let mut chunk_end = end;
661
662 if end < text.len() {
664 if let Some(sentence_end) = text[start..end].rfind(". ") {
665 chunk_end = start + sentence_end + 1;
666 } else if let Some(para_end) = text[start..end].rfind("\n\n") {
667 chunk_end = start + para_end + 1;
668 } else if let Some(line_end) = text[start..end].rfind('\n') {
669 chunk_end = start + line_end + 1;
670 }
671 }
672
673 let chunk = text[start..chunk_end].trim().to_string();
674 if !chunk.is_empty() {
675 let chunk_len = chunk.len();
676 chunks.push(chunk);
677 crate::debug_log!("Added chunk {}: {} chars", chunks.len(), chunk_len);
678 }
679
680 if chunk_end >= text.len() {
682 crate::debug_log!("Reached end of text, breaking");
683 break;
684 }
685
686 let new_start = if chunk_end > overlap {
687 chunk_end - overlap
688 } else {
689 chunk_end
690 };
691
692 if new_start <= start {
695 start = start + 1;
696 crate::debug_log!(
697 "Preventing infinite loop: moving start from {} to {}",
698 new_start,
699 start
700 );
701 } else {
702 start = new_start;
703 }
704
705 crate::debug_log!("Next start position: {}", start);
706
707 if iteration > 1000 {
709 crate::debug_log!(
710 "WARNING: Too many iterations, breaking to prevent infinite loop"
711 );
712 break;
713 }
714 }
715
716 crate::debug_log!("Chunking complete: {} chunks created", chunks.len());
717 chunks
718 }
719
720 pub fn process_file(path: &std::path::Path) -> Result<Vec<String>> {
722 if let Ok(handle) = tokio::runtime::Handle::try_current() {
724 handle.block_on(Self::process_file_async(path))
725 } else {
726 crate::debug_log!("Reading file synchronously: {}", path.display());
728 let content = std::fs::read_to_string(path)?;
729 crate::debug_log!("File content length: {} characters", content.len());
730
731 crate::debug_log!("Starting text chunking with 1200 char chunks, 200 char overlap");
733 let chunks = Self::chunk_text(&content, 1200, 200);
734
735 crate::debug_log!(
736 "File '{}' split into {} chunks",
737 path.display(),
738 chunks.len()
739 );
740
741 Ok(chunks)
742 }
743 }
744
745 pub async fn process_file_async(path: &std::path::Path) -> Result<Vec<String>> {
747 crate::debug_log!("Reading file: {}", path.display());
748
749 let content = Self::read_file_optimized(path).await?;
750 crate::debug_log!("File content length: {} characters", content.len());
751
752 crate::debug_log!("Starting text chunking with 1200 char chunks, 200 char overlap");
754 let chunks = Self::chunk_text(&content, 1200, 200);
755
756 crate::debug_log!(
757 "File '{}' split into {} chunks",
758 path.display(),
759 chunks.len()
760 );
761
762 Ok(chunks)
763 }
764
765 async fn read_file_optimized(path: &std::path::Path) -> Result<String> {
767 let metadata = tokio::fs::metadata(path).await?;
768 let file_size = metadata.len();
769
770 if file_size > 1_048_576 {
772 crate::debug_log!("Using memory mapping for large file: {} bytes", file_size);
773
774 let file = std::fs::File::open(path)?;
775 let mmap = unsafe { memmap2::Mmap::map(&file)? };
776
777 let content = tokio::task::spawn_blocking(move || {
779 std::str::from_utf8(&mmap)
780 .map_err(|e| anyhow::anyhow!("Invalid UTF-8 in file: {}", e))
781 .map(|s| s.to_string())
782 })
783 .await??;
784
785 Ok(content)
786 } else {
787 crate::debug_log!(
789 "Using async file reading for small file: {} bytes",
790 file_size
791 );
792 Ok(tokio::fs::read_to_string(path).await?)
793 }
794 }
795}
796
797#[cfg(test)]
798mod tests {
799 use super::*;
800
801 #[test]
802 fn test_cosine_similarity() {
803 let a = vec![1.0, 2.0, 3.0];
804 let b = vec![1.0, 2.0, 3.0];
805 assert!((cosine_similarity_simd(&a, &b) - 1.0).abs() < 1e-10);
806
807 let a = vec![1.0, 0.0];
808 let b = vec![0.0, 1.0];
809 assert!((cosine_similarity_simd(&a, &b) - 0.0).abs() < 1e-10);
810 }
811
812 #[test]
813 fn test_chunk_text() {
814 let text = "This is sentence one. This is sentence two. This is sentence three.";
815 let chunks = FileProcessor::chunk_text(text, 30, 10);
816
817 assert!(chunks.len() > 1);
818 assert!(chunks[0].contains("sentence one"));
819 }
820
821 #[test]
822 fn test_is_text_file() {
823 use std::path::Path;
824
825 assert!(FileProcessor::is_text_file(Path::new("test.txt")));
826 assert!(FileProcessor::is_text_file(Path::new("test.rs")));
827 assert!(FileProcessor::is_text_file(Path::new("test.py")));
828 assert!(!FileProcessor::is_text_file(Path::new("test.exe")));
829 assert!(!FileProcessor::is_text_file(Path::new("test.jpg")));
830 }
831}