maproom 0.1.0

Semantic code search powered by embeddings and SQLite
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
//! Incremental file processing with atomic updates and transaction integrity.
//!
//! This module provides the core processor for incremental indexing, handling three types
//! of file changes (new, modified, deleted) with full transaction safety and edge consistency.
//!
//! # Architecture
//!
//! The processor coordinates:
//! - File parsing (via existing ParserFactory)
//! - Chunk database updates (atomic transactions)
//! - Edge relationship maintenance (via EdgeUpdater)
//!
//! # Transaction Flow
//!
//! For modified files:
//! ```sql
//! BEGIN;
//!   DELETE FROM maproom.chunks WHERE file_id = $1;
//!   INSERT INTO maproom.chunks (...) VALUES (...);
//!   UPDATE maproom.files SET blake3_hash = $1, last_modified = NOW() WHERE id = $2;
//!   DELETE FROM maproom.chunk_edges WHERE src_chunk_id IN (...) OR dst_chunk_id IN (...);
//!   INSERT INTO maproom.chunk_edges (...) VALUES (...);
//! COMMIT;
//! ```
//!
//! # Performance Target
//!
//! - File updates complete in <5s for typical files
//! - Automatic rollback on any error (prevents corruption)
//! - Batch operations within transactions for efficiency

use anyhow::{Context, Result};
use std::fs;
use std::path::{Path, PathBuf};
use tracing::{debug, info, warn};

use crate::db::traits::StoreChunks;
use crate::db::traits::StoreCore;
use crate::db::SqliteStore;
use crate::indexer::SymbolChunk;
use std::sync::Arc;

use super::detector::ChangeType;
use super::edge_updater::EdgeUpdater;
use super::hash::ContentHash;
use super::path_utils::normalize_to_relpath;
use super::task::UpdateTask;

/// Maximum file size (in bytes) to index. Files larger than this are skipped.
///
/// Set to 10MB to prevent DoS attacks via very large files (e.g., 1GB file causing OOM).
/// Source code files are typically < 1MB. This limit accommodates large autogenerated files,
/// package-lock.json, etc., while protecting against accidental indexing of binary artifacts.
///
/// Adjust this constant if your repository requires indexing larger files.
const MAX_FILE_SIZE_BYTES: u64 = 10 * 1024 * 1024; // 10MB

/// Incremental processor for atomic file updates.
///
/// Processes individual file changes from the update queue with full
/// transaction safety. Each file operation is atomic - either all changes
/// succeed or all are rolled back.
///
/// # Example
///
/// ```ignore
/// use std::path::PathBuf;
/// use maproom::db::create_pool;
/// use maproom::incremental::{IncrementalProcessor, UpdateTask, ChangeType, FileHasher, Trigger};
///
/// #[tokio::main]
/// async fn main() -> anyhow::Result<()> {
///     let pool = create_pool().await?;
///     let repo_root = PathBuf::from("/workspace");
///     let processor = IncrementalProcessor::new(pool, repo_root, 1, 1, 1);
///
///     // Process a file update
///     let path = PathBuf::from("/workspace/src/main.rs");
///     let old_hash = FileHasher::hash_bytes(b"old content");
///     let new_hash = FileHasher::hash_bytes(b"new content");
///     let task = UpdateTask::new(
///         path,
///         ChangeType::Modified { old: old_hash, new: new_hash },
///         Trigger::Save
///     );
///
///     processor.process(task).await?;
///     Ok(())
/// }
/// ```
pub struct IncrementalProcessor {
    store: Arc<SqliteStore>,
    edge_updater: EdgeUpdater,
    repo_root: PathBuf,
    repo_id: i64,
    worktree_id: i64,
    commit_id: i64,
}

impl IncrementalProcessor {
    /// Create a new incremental processor.
    ///
    /// # Arguments
    /// * `store` - SqliteStore instance
    /// * `repo_root` - Absolute path to the repository root (used for path normalization)
    /// * `repo_id` - Database ID of the repository
    /// * `worktree_id` - Database ID of the worktree
    /// * `commit_id` - Database ID of the commit
    ///
    /// # Returns
    /// A new processor ready to handle file updates
    pub fn new(
        store: Arc<SqliteStore>,
        repo_root: PathBuf,
        repo_id: i64,
        worktree_id: i64,
        commit_id: i64,
    ) -> Self {
        Self {
            edge_updater: EdgeUpdater::new(store.clone()),
            store,
            repo_root,
            repo_id,
            worktree_id,
            commit_id,
        }
    }

    /// Process a single update task.
    ///
    /// Handles the task based on its change type:
    /// - New: Parse and insert file chunks
    /// - Modified: Delete old chunks, insert new ones, update file record
    /// - Deleted: Remove all chunks and edges
    /// - None: Skip (no changes needed)
    ///
    /// All operations are wrapped in a transaction for atomicity.
    ///
    /// # Arguments
    /// * `task` - The update task to process
    ///
    /// # Returns
    /// * `Ok(())` - Task processed successfully
    /// * `Err(_)` - Processing failed, transaction rolled back
    ///
    /// # Performance
    ///
    /// Typical processing times:
    /// - New file: 100-500ms (parse + insert)
    /// - Modified file: 200-800ms (delete + parse + insert + edges)
    /// - Deleted file: 50-200ms (delete chunks + edges)
    ///
    /// # Example
    ///
    /// ```ignore
    /// # use std::path::PathBuf;
    /// # use maproom::db::create_pool;
    /// # use maproom::incremental::{IncrementalProcessor, UpdateTask, ChangeType, FileHasher, Trigger};
    /// # #[tokio::main]
    /// # async fn main() -> anyhow::Result<()> {
    /// # let pool = create_pool().await?;
    /// let repo_root = PathBuf::from("/workspace");
    /// let processor = IncrementalProcessor::new(pool, repo_root);
    /// let task = UpdateTask::new(
    ///     PathBuf::from("/workspace/src/lib.rs"),
    ///     ChangeType::New(FileHasher::hash_bytes(b"content")),
    ///     Trigger::Auto
    /// );
    ///
    /// processor.process(task).await?;
    /// # Ok(())
    /// # }
    /// ```
    pub async fn process(&self, task: UpdateTask) -> Result<()> {
        let path_display = task.path.display().to_string();

        debug!(
            path = %path_display,
            change_type = ?task.change_type,
            priority = ?task.priority,
            "Processing update task"
        );

        match &task.change_type {
            ChangeType::New(hash) => {
                self.index_new_file(&task.path, hash)
                    .await
                    .with_context(|| format!("Failed to index new file: {}", path_display))?;
                info!(path = %path_display, "Indexed new file");
            }
            ChangeType::Modified { old: _, new } => {
                self.update_file(&task.path, new)
                    .await
                    .with_context(|| format!("Failed to update modified file: {}", path_display))?;
                info!(path = %path_display, "Updated modified file");
            }
            ChangeType::Deleted(_) => {
                self.remove_file(&task.path)
                    .await
                    .with_context(|| format!("Failed to remove deleted file: {}", path_display))?;
                info!(path = %path_display, "Removed deleted file");
            }
            ChangeType::None => {
                debug!(path = %path_display, "No change detected, skipping");
                return Ok(());
            }
        }

        Ok(())
    }

    /// Index a new file by parsing and inserting its chunks.
    ///
    /// # Transaction Flow
    /// 1. Look up or create file record in database
    /// 2. Parse file to extract chunks
    /// 3. Insert all chunks in a transaction
    /// 4. Update edges for new chunks
    ///
    /// # Arguments
    /// * `path` - Absolute filesystem path to the new file
    /// * `hash` - Content hash of the file
    ///
    /// # Returns
    /// * `Ok(())` - File indexed successfully
    /// * `Err(_)` - Indexing failed (e.g., parse error, DB error)
    async fn index_new_file(&self, path: &Path, hash: &ContentHash) -> Result<()> {
        // Note: This function previously used PostgreSQL transactions.
        // SQLite implementation will be added in future tickets.

        // Check file size BEFORE reading to prevent OOM on very large files
        let metadata = fs::metadata(path)
            .with_context(|| format!("Failed to get file metadata: {}", path.display()))?;

        if metadata.len() > MAX_FILE_SIZE_BYTES {
            warn!(
                path = %path.display(),
                size_mb = metadata.len() / (1024 * 1024),
                limit_mb = MAX_FILE_SIZE_BYTES / (1024 * 1024),
                "File too large to index, skipping"
            );
            return Ok(()); // Skip gracefully, don't error
        }

        // Detect symlinks for security awareness (log but allow indexing)
        // Use symlink_metadata to check the link itself, not the target
        let symlink_metadata = fs::symlink_metadata(path)
            .with_context(|| format!("Failed to get symlink metadata: {}", path.display()))?;

        if symlink_metadata.file_type().is_symlink() {
            warn!(
                path = %path.display(),
                "Indexing symlink - resolved path may be outside repository"
            );
            // Continue processing - just log awareness
        }

        // CRITICAL: Read file content using absolute path (filesystem operation)
        let content = fs::read_to_string(path)
            .with_context(|| format!("Failed to read file: {}", path.display()))?;

        // Detect language from file extension
        let language = detect_language_from_path(path);

        // CRITICAL: Normalize path for database query (database stores relative paths)
        // Absolute path example: "/workspace/packages/cli/src/main.ts"
        // Relative path example: "packages/cli/src/main.ts"
        let relpath = normalize_to_relpath(path, &self.repo_root)
            .with_context(|| format!("Failed to normalize path: {}", path.display()))?;

        let relpath_str = relpath
            .to_str()
            .ok_or_else(|| anyhow::anyhow!("Invalid UTF-8 in path: {}", relpath.display()))?;

        // 1. Create file record
        let file_record = crate::db::FileRecord {
            repo_id: self.repo_id,
            worktree_id: self.worktree_id,
            commit_id: self.commit_id,
            relpath: relpath_str.to_string(),
            language: language.map(|s| s.to_string()),
            content_hash: hash.to_hex().to_string(),
            size_bytes: metadata.len() as i32,
            last_modified: Some(chrono::Utc::now()),
        };

        let file_id = self
            .store
            .upsert_file(&file_record)
            .await
            .with_context(|| format!("Failed to upsert file record: {}", path.display()))?;

        // 2. Parse file to extract chunks
        let lang_str = language.unwrap_or("unknown");
        let symbol_chunks = parse_file_chunks(&content, lang_str)
            .with_context(|| format!("Failed to parse file: {}", path.display()))?;

        // 3. Create chunk records and insert them
        let mut chunk_ids = Vec::new();
        for chunk in &symbol_chunks {
            let preview = content
                .lines()
                .skip((chunk.start_line - 1) as usize)
                .take((chunk.end_line - chunk.start_line + 1) as usize)
                .collect::<Vec<_>>()
                .join("\n");

            let ts_doc_text = build_ts_doc(
                chunk.symbol_name.as_deref(),
                chunk.signature.as_deref(),
                chunk.docstring.as_deref(),
                &preview,
            );

            // Compute blob_sha for this chunk's content
            let chunk_content = &preview;
            let blob_sha = super::hash::FileHasher::hash_bytes(chunk_content.as_bytes())
                .to_hex()
                .to_string();

            let chunk_record = crate::db::ChunkRecord {
                file_id,
                blob_sha,
                symbol_name: chunk.symbol_name.clone(),
                kind: chunk.kind.clone(),
                signature: chunk.signature.clone(),
                docstring: chunk.docstring.clone(),
                start_line: chunk.start_line,
                end_line: chunk.end_line,
                preview,
                ts_doc_text,
                recency_score: 1.0, // New file = max recency
                churn_score: 0.0,   // New file = no churn
                metadata: chunk.metadata.clone(),
                worktree_id: self.worktree_id,
            };

            let chunk_id = self
                .store
                .insert_chunk(&chunk_record)
                .await
                .with_context(|| format!("Failed to insert chunk for file: {}", path.display()))?;
            chunk_ids.push(chunk_id);
        }

        // 4. Update edges for new chunks (delegate to EdgeUpdater)
        // Note: EdgeUpdater.update_edges takes file_id, not chunk_ids
        // Edge computation is done by file for consistency
        self.edge_updater
            .update_edges(file_id)
            .await
            .with_context(|| format!("Failed to update edges for file: {}", path.display()))?;

        debug!(
            path = %path.display(),
            file_id = file_id,
            chunks = chunk_ids.len(),
            "Indexed new file"
        );

        Ok(())
    }

    /// Update an existing file by replacing its chunks.
    ///
    /// # Transaction Flow
    /// 1. Begin transaction
    /// 2. Delete all existing chunks for the file
    /// 3. Parse file and insert new chunks
    /// 4. Update file record with new hash and timestamp
    /// 5. Commit transaction
    /// 6. Update edges (after transaction completes)
    ///
    /// # Arguments
    /// * `path` - Absolute filesystem path to the modified file
    /// * `new_hash` - New content hash of the file
    ///
    /// # Returns
    /// * `Ok(())` - File updated successfully
    /// * `Err(_)` - Update failed, transaction rolled back
    async fn update_file(&self, path: &Path, _new_hash: &ContentHash) -> Result<()> {
        // Note: This function previously used PostgreSQL transactions.
        // SQLite implementation will be added in future tickets.

        // Check file size BEFORE reading to prevent OOM on very large files
        let metadata = fs::metadata(path)
            .with_context(|| format!("Failed to get file metadata: {}", path.display()))?;

        if metadata.len() > MAX_FILE_SIZE_BYTES {
            warn!(
                path = %path.display(),
                size_mb = metadata.len() / (1024 * 1024),
                limit_mb = MAX_FILE_SIZE_BYTES / (1024 * 1024),
                "File too large to index, skipping"
            );
            return Ok(()); // Skip gracefully, don't error
        }

        // Detect symlinks for security awareness (log but allow indexing)
        // Use symlink_metadata to check the link itself, not the target
        let symlink_metadata = fs::symlink_metadata(path)
            .with_context(|| format!("Failed to get symlink metadata: {}", path.display()))?;

        if symlink_metadata.file_type().is_symlink() {
            warn!(
                path = %path.display(),
                "Indexing symlink - resolved path may be outside repository"
            );
            // Continue processing - just log awareness
        }

        // CRITICAL: Read file content using absolute path (filesystem operation)
        let content = fs::read_to_string(path)
            .with_context(|| format!("Failed to read file: {}", path.display()))?;

        // Detect language from file extension
        let language = detect_language_from_path(path);

        // CRITICAL: Normalize path for database query (database stores relative paths)
        // Absolute path example: "/workspace/packages/cli/src/main.ts"
        // Relative path example: "packages/cli/src/main.ts"
        let relpath = normalize_to_relpath(path, &self.repo_root)
            .with_context(|| format!("Failed to normalize path: {}", path.display()))?;

        let relpath_str = relpath
            .to_str()
            .ok_or_else(|| anyhow::anyhow!("Invalid UTF-8 in path: {}", relpath.display()))?;

        // 1. Look up existing file by relpath
        let file_id = self
            .store
            .get_file_id_by_relpath(relpath_str, self.worktree_id)
            .await
            .with_context(|| format!("Failed to look up file: {}", path.display()))?;

        let file_id = match file_id {
            Some(id) => id,
            None => {
                // File doesn't exist in DB yet - treat as new file
                debug!(path = %path.display(), "File not found in DB, treating as new");
                return self.index_new_file(path, _new_hash).await;
            }
        };

        // 2. Delete old chunks (this also cleans up edges and embeddings)
        let chunks_deleted = self
            .store
            .delete_chunks_by_file(file_id)
            .await
            .with_context(|| format!("Failed to delete old chunks for file: {}", path.display()))?;

        debug!(path = %path.display(), chunks_deleted = chunks_deleted, "Deleted old chunks");

        // 3. Parse file to extract new chunks
        let lang_str = language.unwrap_or("unknown");
        let symbol_chunks = parse_file_chunks(&content, lang_str)
            .with_context(|| format!("Failed to parse file: {}", path.display()))?;

        // 4. Insert new chunks
        let mut chunk_ids = Vec::new();
        for chunk in &symbol_chunks {
            let preview = content
                .lines()
                .skip((chunk.start_line - 1) as usize)
                .take((chunk.end_line - chunk.start_line + 1) as usize)
                .collect::<Vec<_>>()
                .join("\n");

            let ts_doc_text = build_ts_doc(
                chunk.symbol_name.as_deref(),
                chunk.signature.as_deref(),
                chunk.docstring.as_deref(),
                &preview,
            );

            // Compute blob_sha for this chunk's content
            let chunk_content = &preview;
            let blob_sha = super::hash::FileHasher::hash_bytes(chunk_content.as_bytes())
                .to_hex()
                .to_string();

            let chunk_record = crate::db::ChunkRecord {
                file_id,
                blob_sha,
                symbol_name: chunk.symbol_name.clone(),
                kind: chunk.kind.clone(),
                signature: chunk.signature.clone(),
                docstring: chunk.docstring.clone(),
                start_line: chunk.start_line,
                end_line: chunk.end_line,
                preview,
                ts_doc_text,
                recency_score: 1.0, // Modified = high recency
                churn_score: 0.5,   // Modified = some churn
                metadata: chunk.metadata.clone(),
                worktree_id: self.worktree_id,
            };

            let chunk_id = self
                .store
                .insert_chunk(&chunk_record)
                .await
                .with_context(|| format!("Failed to insert chunk for file: {}", path.display()))?;
            chunk_ids.push(chunk_id);
        }

        // 5. Update edges for new chunks
        // Note: EdgeUpdater.update_edges takes file_id, not chunk_ids
        self.edge_updater
            .update_edges(file_id)
            .await
            .with_context(|| format!("Failed to update edges for file: {}", path.display()))?;

        debug!(
            path = %path.display(),
            file_id = file_id,
            chunks = chunk_ids.len(),
            "Updated file"
        );

        Ok(())
    }

    /// Remove a deleted file and all its chunks.
    ///
    /// # Transaction Flow
    /// 1. Begin transaction
    /// 2. Delete all chunks (CASCADE deletes edges automatically)
    /// 3. Delete file record
    /// 4. Commit transaction
    ///
    /// # Arguments
    /// * `path` - Absolute filesystem path to the deleted file
    ///
    /// # Returns
    /// * `Ok(())` - File removed successfully
    /// * `Err(_)` - Removal failed, transaction rolled back
    async fn remove_file(&self, path: &Path) -> Result<()> {
        // Note: This function previously used PostgreSQL transactions.
        // SQLite implementation will be added in future tickets.

        // CRITICAL: Normalize path for database query (database stores relative paths)
        // Absolute path example: "/workspace/packages/cli/src/main.ts"
        // Relative path example: "packages/cli/src/main.ts"
        let relpath = normalize_to_relpath(path, &self.repo_root)
            .with_context(|| format!("Failed to normalize path: {}", path.display()))?;

        let relpath_str = relpath
            .to_str()
            .ok_or_else(|| anyhow::anyhow!("Invalid UTF-8 in path: {}", relpath.display()))?;

        // 1. Look up file by relpath
        let file_id = self
            .store
            .get_file_id_by_relpath(relpath_str, self.worktree_id)
            .await
            .with_context(|| format!("Failed to look up file: {}", path.display()))?;

        let file_id = match file_id {
            Some(id) => id,
            None => {
                // File doesn't exist in DB - nothing to delete
                debug!(path = %path.display(), "File not found in DB, nothing to delete");
                return Ok(());
            }
        };

        // 2. Delete chunks (this also cleans up edges and embeddings via CASCADE)
        let chunks_deleted = self
            .store
            .delete_chunks_by_file(file_id)
            .await
            .with_context(|| format!("Failed to delete chunks for file: {}", path.display()))?;

        // 3. Delete file record
        self.store
            .delete_file(file_id)
            .await
            .with_context(|| format!("Failed to delete file record: {}", path.display()))?;

        debug!(
            path = %path.display(),
            file_id = file_id,
            chunks_deleted = chunks_deleted,
            "Removed file"
        );

        Ok(())
    }
}

/// Detect programming language from file path extension.
///
/// # Arguments
/// * `path` - File path to analyze
///
/// # Returns
/// Language identifier (e.g., "ts", "rs", "md") or None if unknown
fn detect_language_from_path(path: &Path) -> Option<&'static str> {
    match path.extension().and_then(|e| e.to_str()).unwrap_or("") {
        "ts" => Some("ts"),
        "tsx" => Some("tsx"),
        "js" => Some("js"),
        "jsx" => Some("jsx"),
        "rs" => Some("rs"),
        "md" => Some("md"),
        "mdx" => Some("mdx"),
        "json" => Some("json"),
        "yaml" | "yml" => Some("yaml"),
        "toml" => Some("toml"),
        _ => None,
    }
}

/// Parse a file's content to extract symbol chunks.
///
/// Uses the existing parser infrastructure from `crate::indexer::parser`.
///
/// # Arguments
/// * `content` - File content as string
/// * `language` - Language identifier
///
/// # Returns
/// Vector of symbol chunks extracted from the file
fn parse_file_chunks(content: &str, language: &str) -> Result<Vec<SymbolChunk>> {
    use crate::indexer::parser;

    let chunks = parser::extract_chunks(content, language);

    // If no chunks extracted, create a single module-level chunk
    if chunks.is_empty() {
        Ok(vec![SymbolChunk {
            symbol_name: None,
            kind: "module".to_string(),
            signature: None,
            docstring: None,
            start_line: 1,
            end_line: content.lines().count() as i32,
            metadata: None,
        }])
    } else {
        Ok(chunks)
    }
}

// Note: insert_chunk_in_transaction was removed as part of PostgreSQL to SQLite migration.
// Chunk insertion will be implemented using SqliteStore methods in future tickets.

/// Build full-text search document from chunk metadata.
///
/// Combines symbol name, signature, docstring, and preview into a single
/// searchable document.
///
/// # Arguments
/// * `symbol_name` - Optional symbol name
/// * `signature` - Optional function/class signature
/// * `docstring` - Optional documentation string
/// * `preview` - Code preview text
///
/// # Returns
/// Combined text document for full-text search
fn build_ts_doc(
    symbol_name: Option<&str>,
    signature: Option<&str>,
    docstring: Option<&str>,
    preview: &str,
) -> String {
    let mut parts: Vec<String> = Vec::new();

    if let Some(s) = symbol_name {
        parts.push(s.to_owned());
    }
    if let Some(s) = signature {
        parts.push(s.to_owned());
    }
    if let Some(s) = docstring {
        parts.push(s.to_owned());
    }
    parts.push(preview.to_owned());

    parts.join(" \n ")
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_detect_language_from_path() {
        assert_eq!(
            detect_language_from_path(Path::new("src/main.rs")),
            Some("rs")
        );
        assert_eq!(
            detect_language_from_path(Path::new("src/lib.ts")),
            Some("ts")
        );
        assert_eq!(
            detect_language_from_path(Path::new("README.md")),
            Some("md")
        );
        assert_eq!(
            detect_language_from_path(Path::new("config.yaml")),
            Some("yaml")
        );
        assert_eq!(detect_language_from_path(Path::new("unknown.xyz")), None);
    }

    #[test]
    fn test_build_ts_doc() {
        let doc = build_ts_doc(
            Some("myFunction"),
            Some("fn myFunction(x: i32) -> i32"),
            Some("Does something cool"),
            "let x = 42;",
        );

        assert!(doc.contains("myFunction"));
        assert!(doc.contains("fn myFunction"));
        assert!(doc.contains("Does something cool"));
        assert!(doc.contains("let x = 42;"));
    }

    #[test]
    fn test_build_ts_doc_minimal() {
        let doc = build_ts_doc(None, None, None, "some code");
        assert_eq!(doc, "some code");
    }

    #[test]
    fn test_parse_file_chunks_creates_module_for_empty() {
        let chunks = parse_file_chunks("", "unknown").unwrap();
        assert_eq!(chunks.len(), 1);
        assert_eq!(chunks[0].kind, "module");
    }
}