Skip to main content

rumdl_lib/lsp/
index_worker.rs

1//! Background worker for workspace index management
2//!
3//! This module provides a background task that manages the workspace index
4//! for cross-file analysis. It handles debouncing rapid file updates and
5//! efficiently updates the index without blocking the main LSP server.
6
7use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use std::time::{Duration, Instant};
11
12use tokio::sync::{RwLock, mpsc};
13use tower_lsp::Client;
14use tower_lsp::lsp_types::*;
15
16use crate::config::MarkdownFlavor;
17use crate::lint_context::LintContext;
18use crate::lsp::types::{IndexState, IndexUpdate};
19use crate::utils::anchor_styles::AnchorStyle;
20use crate::workspace_index::{FileIndex, HeadingIndex, WorkspaceIndex, extract_cross_file_links};
21
22/// Supported markdown file extensions
23const MARKDOWN_EXTENSIONS: &[&str] = &["md", "markdown", "mdx", "mkd", "mkdn", "mdown", "mdwn", "qmd", "rmd"];
24
25/// Check if a file extension is a markdown extension
26#[inline]
27fn is_markdown_extension(ext: &std::ffi::OsStr) -> bool {
28    ext.to_str()
29        .is_some_and(|s| MARKDOWN_EXTENSIONS.contains(&s.to_lowercase().as_str()))
30}
31
32/// Background worker for managing the workspace index
33///
34/// Receives updates via a channel and maintains the workspace index
35/// with debouncing to avoid excessive re-indexing during rapid edits.
36pub struct IndexWorker {
37    /// Receiver for index update messages
38    rx: mpsc::Receiver<IndexUpdate>,
39    /// The workspace index being maintained
40    workspace_index: Arc<RwLock<WorkspaceIndex>>,
41    /// Current state of the index (building/ready/error)
42    index_state: Arc<RwLock<IndexState>>,
43    /// LSP client for progress reporting
44    client: Client,
45    /// Workspace root folders
46    workspace_roots: Arc<RwLock<Vec<PathBuf>>>,
47    /// Debouncing: path -> (content, last_update_time)
48    pending: HashMap<PathBuf, (String, Instant)>,
49    /// Debounce duration
50    debounce_duration: Duration,
51    /// Sender to request re-linting of files (back to server)
52    relint_tx: mpsc::Sender<PathBuf>,
53}
54
55impl IndexWorker {
56    /// Create a new index worker
57    pub fn new(
58        rx: mpsc::Receiver<IndexUpdate>,
59        workspace_index: Arc<RwLock<WorkspaceIndex>>,
60        index_state: Arc<RwLock<IndexState>>,
61        client: Client,
62        workspace_roots: Arc<RwLock<Vec<PathBuf>>>,
63        relint_tx: mpsc::Sender<PathBuf>,
64    ) -> Self {
65        Self {
66            rx,
67            workspace_index,
68            index_state,
69            client,
70            workspace_roots,
71            pending: HashMap::new(),
72            debounce_duration: Duration::from_millis(100),
73            relint_tx,
74        }
75    }
76
77    /// Run the index worker event loop
78    pub async fn run(mut self) {
79        let mut debounce_interval = tokio::time::interval(Duration::from_millis(50));
80
81        loop {
82            tokio::select! {
83                // Receive updates from main server
84                msg = self.rx.recv() => {
85                    match msg {
86                        Some(IndexUpdate::FileChanged { path, content }) => {
87                            self.pending.insert(path, (content, Instant::now()));
88                        }
89                        Some(IndexUpdate::FileDeleted { path }) => {
90                            self.handle_file_deleted(&path).await;
91                        }
92                        Some(IndexUpdate::FullRescan) => {
93                            self.full_rescan().await;
94                        }
95                        Some(IndexUpdate::Shutdown) | None => {
96                            log::info!("Index worker shutting down");
97                            break;
98                        }
99                    }
100                }
101
102                // Process debounced updates periodically
103                _ = debounce_interval.tick() => {
104                    self.process_pending_updates().await;
105                }
106            }
107        }
108    }
109
110    /// Process pending updates that have been debounced long enough
111    async fn process_pending_updates(&mut self) {
112        let now = Instant::now();
113        let ready: Vec<_> = self
114            .pending
115            .iter()
116            .filter(|(_, (_, time))| now.duration_since(*time) >= self.debounce_duration)
117            .map(|(path, _)| path.clone())
118            .collect();
119
120        for path in ready {
121            if let Some((content, _)) = self.pending.remove(&path) {
122                self.update_single_file(&path, &content).await;
123            }
124        }
125    }
126
127    /// Update a single file in the index
128    async fn update_single_file(&self, path: &Path, content: &str) {
129        // Build FileIndex using LintContext
130        let file_index =
131            match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| Self::build_file_index(content))) {
132                Ok(index) => index,
133                Err(_) => {
134                    log::error!("Panic while indexing {}: skipping", path.display());
135                    return;
136                }
137            };
138
139        // Get old dependents before updating
140        let old_dependents = {
141            let index = self.workspace_index.read().await;
142            index.get_dependents(path)
143        };
144
145        // Update the index
146        {
147            let mut index = self.workspace_index.write().await;
148            index.update_file(path, file_index);
149        }
150
151        // Get new dependents after updating
152        let new_dependents = {
153            let index = self.workspace_index.read().await;
154            index.get_dependents(path)
155        };
156
157        // Request re-lint of affected files (union of old and new dependents)
158        let mut affected: std::collections::HashSet<PathBuf> = old_dependents.into_iter().collect();
159        affected.extend(new_dependents);
160
161        for dep_path in affected {
162            if self.relint_tx.send(dep_path.clone()).await.is_err() {
163                log::warn!("Failed to send re-lint request for {}", dep_path.display());
164            }
165        }
166    }
167
168    /// Build a FileIndex from content
169    fn build_file_index(content: &str) -> FileIndex {
170        let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
171        let mut file_index = FileIndex::new();
172
173        // Extract headings from the content
174        for (line_num, line_info) in ctx.lines.iter().enumerate() {
175            if let Some(heading) = &line_info.heading {
176                let auto_anchor = AnchorStyle::GitHub.generate_fragment(&heading.text);
177
178                file_index.add_heading(HeadingIndex {
179                    text: heading.text.clone(),
180                    auto_anchor,
181                    custom_anchor: heading.custom_id.clone(),
182                    line: line_num + 1, // 1-indexed
183                });
184            }
185        }
186
187        // Extract cross-file links using the shared utility
188        // This ensures consistent position tracking with MD057
189        for link in extract_cross_file_links(&ctx) {
190            file_index.add_cross_file_link(link);
191        }
192
193        file_index
194    }
195
196    /// Handle a file deletion
197    async fn handle_file_deleted(&self, path: &Path) {
198        // Remove pending update for this file
199        // (self.pending is not accessible here directly, but FileDeleted is handled immediately)
200
201        // Get dependents before removing
202        let dependents = {
203            let index = self.workspace_index.read().await;
204            index.get_dependents(path)
205        };
206
207        // Remove from index
208        {
209            let mut index = self.workspace_index.write().await;
210            index.remove_file(path);
211        }
212
213        // Request re-lint of dependent files (they now have broken links)
214        for dep_path in dependents {
215            if self.relint_tx.send(dep_path.clone()).await.is_err() {
216                log::warn!("Failed to send re-lint request for {}", dep_path.display());
217            }
218        }
219    }
220
221    /// Perform a full rescan of the workspace
222    async fn full_rescan(&mut self) {
223        // Clear pending updates
224        self.pending.clear();
225
226        // Find all markdown files in workspace roots
227        let roots = self.workspace_roots.read().await.clone();
228        let files = scan_markdown_files(&roots).await;
229        let total = files.len();
230
231        if total == 0 {
232            *self.index_state.write().await = IndexState::Ready;
233            return;
234        }
235
236        // Set initial building state
237        *self.index_state.write().await = IndexState::Building {
238            progress: 0.0,
239            files_indexed: 0,
240            total_files: total,
241        };
242
243        // Report progress start
244        self.report_progress_begin(total).await;
245
246        // Index each file
247        for (i, path) in files.iter().enumerate() {
248            if let Ok(content) = tokio::fs::read_to_string(path).await {
249                let file_index = Self::build_file_index(&content);
250
251                let mut index = self.workspace_index.write().await;
252                index.update_file(path, file_index);
253            }
254
255            // Report progress every 10 files or at end
256            if i % 10 == 0 || i == total - 1 {
257                let progress = ((i + 1) as f32 / total as f32) * 100.0;
258                *self.index_state.write().await = IndexState::Building {
259                    progress,
260                    files_indexed: i + 1,
261                    total_files: total,
262                };
263                self.report_progress_update(i + 1, total).await;
264            }
265        }
266
267        // Mark as ready
268        *self.index_state.write().await = IndexState::Ready;
269        self.report_progress_done().await;
270
271        log::info!("Workspace indexing complete: {total} files indexed");
272    }
273
274    /// Report progress begin via LSP
275    async fn report_progress_begin(&self, total: usize) {
276        let token = NumberOrString::String("rumdl-index".to_string());
277
278        // Request progress token creation
279        if self
280            .client
281            .send_request::<request::WorkDoneProgressCreate>(WorkDoneProgressCreateParams { token: token.clone() })
282            .await
283            .is_err()
284        {
285            log::debug!("Client does not support work done progress");
286            return;
287        }
288
289        // Send begin notification
290        self.client
291            .send_notification::<notification::Progress>(ProgressParams {
292                token,
293                value: ProgressParamsValue::WorkDone(WorkDoneProgress::Begin(WorkDoneProgressBegin {
294                    title: "Indexing workspace".to_string(),
295                    cancellable: Some(false),
296                    message: Some(format!("Scanning {total} markdown files...")),
297                    percentage: Some(0),
298                })),
299            })
300            .await;
301    }
302
303    /// Report progress update via LSP
304    async fn report_progress_update(&self, indexed: usize, total: usize) {
305        let token = NumberOrString::String("rumdl-index".to_string());
306        let percentage = ((indexed as f32 / total as f32) * 100.0) as u32;
307
308        self.client
309            .send_notification::<notification::Progress>(ProgressParams {
310                token,
311                value: ProgressParamsValue::WorkDone(WorkDoneProgress::Report(WorkDoneProgressReport {
312                    cancellable: Some(false),
313                    message: Some(format!("Indexed {indexed}/{total} files")),
314                    percentage: Some(percentage),
315                })),
316            })
317            .await;
318    }
319
320    /// Report progress done via LSP
321    async fn report_progress_done(&self) {
322        let token = NumberOrString::String("rumdl-index".to_string());
323
324        self.client
325            .send_notification::<notification::Progress>(ProgressParams {
326                token,
327                value: ProgressParamsValue::WorkDone(WorkDoneProgress::End(WorkDoneProgressEnd {
328                    message: Some("Indexing complete".to_string()),
329                })),
330            })
331            .await;
332    }
333}
334
335/// Scan workspace roots for markdown files
336async fn scan_markdown_files(roots: &[PathBuf]) -> Vec<PathBuf> {
337    let mut files = Vec::new();
338
339    for root in roots {
340        if let Err(e) = collect_markdown_files_recursive(root, &mut files).await {
341            log::warn!("Error scanning {}: {}", root.display(), e);
342        }
343    }
344
345    files
346}
347
348/// Recursively collect markdown files from a directory
349async fn collect_markdown_files_recursive(dir: &PathBuf, files: &mut Vec<PathBuf>) -> std::io::Result<()> {
350    let mut entries = tokio::fs::read_dir(dir).await?;
351
352    while let Some(entry) = entries.next_entry().await? {
353        let path = entry.path();
354        let file_type = entry.file_type().await?;
355
356        if file_type.is_dir() {
357            // Skip hidden directories and common non-source directories
358            let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
359            if !name.starts_with('.') && name != "node_modules" && name != "target" {
360                Box::pin(collect_markdown_files_recursive(&path, files)).await?;
361            }
362        } else if file_type.is_file()
363            && let Some(ext) = path.extension()
364            && is_markdown_extension(ext)
365        {
366            files.push(path);
367        }
368    }
369
370    Ok(())
371}
372
373#[cfg(test)]
374mod tests {
375    use super::*;
376
377    #[test]
378    fn test_build_file_index() {
379        let content = r#"
380# Main Heading
381
382Some text.
383
384## Sub Heading {#sub}
385
386More text with [link](./other.md#section).
387"#;
388
389        let index = IndexWorker::build_file_index(content);
390
391        assert_eq!(index.headings.len(), 2);
392        assert_eq!(index.headings[0].text, "Main Heading");
393        assert!(index.headings[0].custom_anchor.is_none());
394
395        // HeadingInfo.text has the custom ID stripped; the custom_id is stored separately
396        assert_eq!(index.headings[1].text, "Sub Heading");
397        assert_eq!(index.headings[1].custom_anchor, Some("sub".to_string()));
398
399        assert_eq!(index.cross_file_links.len(), 1);
400        assert_eq!(index.cross_file_links[0].target_path, "./other.md");
401        assert_eq!(index.cross_file_links[0].fragment, "section");
402    }
403
404    #[test]
405    fn test_build_file_index_column_positions() {
406        // Verify that column positions are correct (fix for issue #234)
407        let content = "See [link](./file.md) here.\n";
408
409        let index = IndexWorker::build_file_index(content);
410
411        assert_eq!(index.cross_file_links.len(), 1);
412        assert_eq!(index.cross_file_links[0].target_path, "./file.md");
413        assert_eq!(index.cross_file_links[0].line, 1);
414        // "See [link](" = 11 chars, so column 12 is where "./file.md" starts
415        assert_eq!(index.cross_file_links[0].column, 12);
416    }
417
418    #[test]
419    fn test_build_file_index_multiple_links() {
420        let content = "First [a](./a.md) and [b](./b.md#section) links.\n";
421
422        let index = IndexWorker::build_file_index(content);
423
424        assert_eq!(index.cross_file_links.len(), 2);
425
426        // First link: "First [a](" = 10 chars, column 11
427        assert_eq!(index.cross_file_links[0].target_path, "./a.md");
428        assert_eq!(index.cross_file_links[0].column, 11);
429
430        // Second link: "First [a](./a.md) and [b](" = 26 chars, column 27
431        assert_eq!(index.cross_file_links[1].target_path, "./b.md");
432        assert_eq!(index.cross_file_links[1].fragment, "section");
433        assert_eq!(index.cross_file_links[1].column, 27);
434    }
435}