1use std::collections::HashMap;
8use std::path::{Path, PathBuf};
9use std::sync::Arc;
10use std::time::{Duration, Instant};
11
12use tokio::sync::{RwLock, mpsc};
13use tower_lsp::Client;
14use tower_lsp::lsp_types::*;
15
16use crate::config::MarkdownFlavor;
17use crate::lint_context::LintContext;
18use crate::lsp::types::{IndexState, IndexUpdate};
19use crate::utils::anchor_styles::AnchorStyle;
20use crate::workspace_index::{FileIndex, HeadingIndex, WorkspaceIndex, extract_cross_file_links};
21
22const MARKDOWN_EXTENSIONS: &[&str] = &["md", "markdown", "mdx", "mkd", "mkdn", "mdown", "mdwn", "qmd", "rmd"];
24
25#[inline]
27fn is_markdown_extension(ext: &std::ffi::OsStr) -> bool {
28 ext.to_str()
29 .is_some_and(|s| MARKDOWN_EXTENSIONS.contains(&s.to_lowercase().as_str()))
30}
31
32pub struct IndexWorker {
37 rx: mpsc::Receiver<IndexUpdate>,
39 workspace_index: Arc<RwLock<WorkspaceIndex>>,
41 index_state: Arc<RwLock<IndexState>>,
43 client: Client,
45 workspace_roots: Arc<RwLock<Vec<PathBuf>>>,
47 pending: HashMap<PathBuf, (String, Instant)>,
49 debounce_duration: Duration,
51 relint_tx: mpsc::Sender<PathBuf>,
53}
54
55impl IndexWorker {
56 pub fn new(
58 rx: mpsc::Receiver<IndexUpdate>,
59 workspace_index: Arc<RwLock<WorkspaceIndex>>,
60 index_state: Arc<RwLock<IndexState>>,
61 client: Client,
62 workspace_roots: Arc<RwLock<Vec<PathBuf>>>,
63 relint_tx: mpsc::Sender<PathBuf>,
64 ) -> Self {
65 Self {
66 rx,
67 workspace_index,
68 index_state,
69 client,
70 workspace_roots,
71 pending: HashMap::new(),
72 debounce_duration: Duration::from_millis(100),
73 relint_tx,
74 }
75 }
76
77 pub async fn run(mut self) {
79 let mut debounce_interval = tokio::time::interval(Duration::from_millis(50));
80
81 loop {
82 tokio::select! {
83 msg = self.rx.recv() => {
85 match msg {
86 Some(IndexUpdate::FileChanged { path, content }) => {
87 self.pending.insert(path, (content, Instant::now()));
88 }
89 Some(IndexUpdate::FileDeleted { path }) => {
90 self.handle_file_deleted(&path).await;
91 }
92 Some(IndexUpdate::FullRescan) => {
93 self.full_rescan().await;
94 }
95 Some(IndexUpdate::Shutdown) | None => {
96 log::info!("Index worker shutting down");
97 break;
98 }
99 }
100 }
101
102 _ = debounce_interval.tick() => {
104 self.process_pending_updates().await;
105 }
106 }
107 }
108 }
109
110 async fn process_pending_updates(&mut self) {
112 let now = Instant::now();
113 let ready: Vec<_> = self
114 .pending
115 .iter()
116 .filter(|(_, (_, time))| now.duration_since(*time) >= self.debounce_duration)
117 .map(|(path, _)| path.clone())
118 .collect();
119
120 for path in ready {
121 if let Some((content, _)) = self.pending.remove(&path) {
122 self.update_single_file(&path, &content).await;
123 }
124 }
125 }
126
127 async fn update_single_file(&self, path: &Path, content: &str) {
129 let Ok(file_index) = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| Self::build_file_index(content)))
131 else {
132 log::error!("Panic while indexing {}: skipping", path.display());
133 return;
134 };
135
136 let old_dependents = {
138 let index = self.workspace_index.read().await;
139 index.get_dependents(path)
140 };
141
142 {
144 let mut index = self.workspace_index.write().await;
145 index.update_file(path, file_index);
146 }
147
148 let new_dependents = {
150 let index = self.workspace_index.read().await;
151 index.get_dependents(path)
152 };
153
154 let mut affected: std::collections::HashSet<PathBuf> = old_dependents.into_iter().collect();
156 affected.extend(new_dependents);
157
158 for dep_path in affected {
159 if self.relint_tx.send(dep_path.clone()).await.is_err() {
160 log::warn!("Failed to send re-lint request for {}", dep_path.display());
161 }
162 }
163 }
164
165 pub(super) fn build_file_index(content: &str) -> FileIndex {
167 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
168 let mut file_index = FileIndex::new();
169
170 for (line_num, line_info) in ctx.lines.iter().enumerate() {
172 if let Some(heading) = &line_info.heading {
173 let auto_anchor = AnchorStyle::GitHub.generate_fragment(&heading.text);
174 let is_setext = matches!(
175 heading.style,
176 crate::lint_context::types::HeadingStyle::Setext1
177 | crate::lint_context::types::HeadingStyle::Setext2
178 );
179
180 file_index.add_heading(HeadingIndex {
181 text: heading.text.clone(),
182 auto_anchor,
183 custom_anchor: heading.custom_id.clone(),
184 line: line_num + 1, is_setext,
186 });
187 }
188 }
189
190 let links = extract_cross_file_links(&ctx);
193 for link in links.relative {
194 file_index.add_cross_file_link(link);
195 }
196 for link in links.root_relative {
197 file_index.add_root_relative_link(link);
198 }
199
200 file_index
201 }
202
203 async fn handle_file_deleted(&self, path: &Path) {
205 let dependents = {
210 let index = self.workspace_index.read().await;
211 index.get_dependents(path)
212 };
213
214 {
216 let mut index = self.workspace_index.write().await;
217 index.remove_file(path);
218 }
219
220 for dep_path in dependents {
222 if self.relint_tx.send(dep_path.clone()).await.is_err() {
223 log::warn!("Failed to send re-lint request for {}", dep_path.display());
224 }
225 }
226 }
227
228 async fn full_rescan(&mut self) {
230 self.pending.clear();
232
233 let roots = self.workspace_roots.read().await.clone();
235 let files = scan_markdown_files(&roots).await;
236 let total = files.len();
237
238 if total == 0 {
239 *self.index_state.write().await = IndexState::Ready;
240 return;
241 }
242
243 *self.index_state.write().await = IndexState::Building {
245 progress: 0.0,
246 files_indexed: 0,
247 total_files: total,
248 };
249
250 self.report_progress_begin(total).await;
252
253 for (i, path) in files.iter().enumerate() {
255 if let Ok(content) = tokio::fs::read_to_string(path).await {
256 let file_index = Self::build_file_index(&content);
257
258 let mut index = self.workspace_index.write().await;
259 index.update_file(path, file_index);
260 }
261
262 if i % 10 == 0 || i == total - 1 {
264 let progress = ((i + 1) as f32 / total as f32) * 100.0;
265 *self.index_state.write().await = IndexState::Building {
266 progress,
267 files_indexed: i + 1,
268 total_files: total,
269 };
270 self.report_progress_update(i + 1, total).await;
271 }
272 }
273
274 *self.index_state.write().await = IndexState::Ready;
276 self.report_progress_done().await;
277
278 log::info!("Workspace indexing complete: {total} files indexed");
279 }
280
281 async fn report_progress_begin(&self, total: usize) {
283 let token = NumberOrString::String("rumdl-index".to_string());
284
285 if self
287 .client
288 .send_request::<request::WorkDoneProgressCreate>(WorkDoneProgressCreateParams { token: token.clone() })
289 .await
290 .is_err()
291 {
292 log::debug!("Client does not support work done progress");
293 return;
294 }
295
296 self.client
298 .send_notification::<notification::Progress>(ProgressParams {
299 token,
300 value: ProgressParamsValue::WorkDone(WorkDoneProgress::Begin(WorkDoneProgressBegin {
301 title: "Indexing workspace".to_string(),
302 cancellable: Some(false),
303 message: Some(format!("Scanning {total} markdown files...")),
304 percentage: Some(0),
305 })),
306 })
307 .await;
308 }
309
310 async fn report_progress_update(&self, indexed: usize, total: usize) {
312 let token = NumberOrString::String("rumdl-index".to_string());
313 let percentage = ((indexed as f32 / total as f32) * 100.0) as u32;
314
315 self.client
316 .send_notification::<notification::Progress>(ProgressParams {
317 token,
318 value: ProgressParamsValue::WorkDone(WorkDoneProgress::Report(WorkDoneProgressReport {
319 cancellable: Some(false),
320 message: Some(format!("Indexed {indexed}/{total} files")),
321 percentage: Some(percentage),
322 })),
323 })
324 .await;
325 }
326
327 async fn report_progress_done(&self) {
329 let token = NumberOrString::String("rumdl-index".to_string());
330
331 self.client
332 .send_notification::<notification::Progress>(ProgressParams {
333 token,
334 value: ProgressParamsValue::WorkDone(WorkDoneProgress::End(WorkDoneProgressEnd {
335 message: Some("Indexing complete".to_string()),
336 })),
337 })
338 .await;
339 }
340}
341
342async fn scan_markdown_files(roots: &[PathBuf]) -> Vec<PathBuf> {
348 let roots = roots.to_vec();
349 tokio::task::spawn_blocking(move || collect_markdown_files(&roots))
350 .await
351 .unwrap_or_else(|e| {
352 log::warn!("Workspace scan task failed: {e}");
353 Vec::new()
354 })
355}
356
357fn index_walk_builder(root: &Path) -> ignore::WalkBuilder {
365 let mut builder = ignore::WalkBuilder::new(root);
366 builder
367 .hidden(true)
368 .git_ignore(true)
369 .git_global(true)
370 .git_exclude(true)
371 .parents(true)
372 .require_git(false)
374 .filter_entry(|entry| {
376 let name = entry.file_name().to_str().unwrap_or("");
377 name != "node_modules" && name != "target"
378 });
379 builder
380}
381
382fn collect_markdown_files(roots: &[PathBuf]) -> Vec<PathBuf> {
384 let mut files = Vec::new();
385
386 for root in roots {
387 for result in index_walk_builder(root).build() {
388 match result {
389 Ok(entry) => {
390 let path = entry.path();
391 if entry.file_type().is_some_and(|t| t.is_file())
392 && let Some(ext) = path.extension()
393 && is_markdown_extension(ext)
394 {
395 files.push(path.to_path_buf());
396 }
397 }
398 Err(e) => log::warn!("Error scanning {}: {}", root.display(), e),
399 }
400 }
401 }
402
403 files
404}
405
406pub(super) fn path_is_ignored_for_index(roots: &[PathBuf], path: &Path) -> bool {
425 let Some(root) = roots
428 .iter()
429 .filter(|r| path.starts_with(r))
430 .max_by_key(|r| r.components().count())
431 else {
432 return false;
433 };
434
435 if let Ok(rel) = path.strip_prefix(root)
438 && rel
439 .components()
440 .any(|c| matches!(c, std::path::Component::Normal(name) if name == "node_modules" || name == "target"))
441 {
442 return true;
443 }
444
445 let target = path.to_path_buf();
446 let mut builder = index_walk_builder(root);
447 builder.filter_entry(move |entry| target.starts_with(entry.path()));
450 for entry in builder.build().flatten() {
451 if entry.path() == path {
452 return false;
453 }
454 }
455 true
456}
457
458#[cfg(test)]
459mod tests {
460 use super::*;
461
462 #[test]
463 fn test_build_file_index() {
464 let content = r#"
465# Main Heading
466
467Some text.
468
469## Sub Heading {#sub}
470
471More text with [link](./other.md#section).
472"#;
473
474 let index = IndexWorker::build_file_index(content);
475
476 assert_eq!(index.headings.len(), 2);
477 assert_eq!(index.headings[0].text, "Main Heading");
478 assert!(index.headings[0].custom_anchor.is_none());
479
480 assert_eq!(index.headings[1].text, "Sub Heading");
482 assert_eq!(index.headings[1].custom_anchor, Some("sub".to_string()));
483
484 assert_eq!(index.cross_file_links.len(), 1);
485 assert_eq!(index.cross_file_links[0].target_path, "./other.md");
486 assert_eq!(index.cross_file_links[0].fragment, "section");
487 }
488
489 #[test]
490 fn test_build_file_index_column_positions() {
491 let content = "See [link](./file.md) here.\n";
493
494 let index = IndexWorker::build_file_index(content);
495
496 assert_eq!(index.cross_file_links.len(), 1);
497 assert_eq!(index.cross_file_links[0].target_path, "./file.md");
498 assert_eq!(index.cross_file_links[0].line, 1);
499 assert_eq!(index.cross_file_links[0].column, 12);
501 }
502
503 #[test]
504 fn test_build_file_index_multiple_links() {
505 let content = "First [a](./a.md) and [b](./b.md#section) links.\n";
506
507 let index = IndexWorker::build_file_index(content);
508
509 assert_eq!(index.cross_file_links.len(), 2);
510
511 assert_eq!(index.cross_file_links[0].target_path, "./a.md");
513 assert_eq!(index.cross_file_links[0].column, 11);
514
515 assert_eq!(index.cross_file_links[1].target_path, "./b.md");
517 assert_eq!(index.cross_file_links[1].fragment, "section");
518 assert_eq!(index.cross_file_links[1].column, 27);
519 }
520
521 #[test]
522 fn test_collect_markdown_files_respects_gitignore() {
523 use std::fs;
524
525 let dir = tempfile::tempdir().unwrap();
526 let root = dir.path();
527
528 fs::write(root.join("README.md"), "# Readme\n").unwrap();
530 fs::write(root.join(".gitignore"), "build/\nignored.md\n").unwrap();
531 fs::write(root.join("ignored.md"), "# Ignored\n").unwrap();
532 fs::create_dir(root.join("build")).unwrap();
533 fs::write(root.join("build").join("generated.md"), "# Generated\n").unwrap();
534
535 fs::create_dir(root.join("node_modules")).unwrap();
537 fs::write(root.join("node_modules").join("dep.md"), "# Dep\n").unwrap();
538
539 let mut files = collect_markdown_files(&[root.to_path_buf()]);
540 files.sort();
541
542 let names: Vec<String> = files
543 .iter()
544 .map(|p| p.file_name().unwrap().to_str().unwrap().to_string())
545 .collect();
546
547 assert_eq!(names, vec!["README.md".to_string()]);
548 }
549
550 #[test]
551 fn test_collect_markdown_files_finds_nested_markdown() {
552 use std::fs;
553
554 let dir = tempfile::tempdir().unwrap();
555 let root = dir.path();
556
557 fs::write(root.join("top.md"), "# Top\n").unwrap();
558 fs::create_dir(root.join("docs")).unwrap();
559 fs::write(root.join("docs").join("guide.markdown"), "# Guide\n").unwrap();
560 fs::write(root.join("docs").join("notes.txt"), "not markdown\n").unwrap();
561
562 let mut names: Vec<String> = collect_markdown_files(&[root.to_path_buf()])
563 .iter()
564 .map(|p| p.file_name().unwrap().to_str().unwrap().to_string())
565 .collect();
566 names.sort();
567
568 assert_eq!(names, vec!["guide.markdown".to_string(), "top.md".to_string()]);
569 }
570
571 #[test]
572 fn test_path_is_ignored_for_index() {
573 use std::fs;
574
575 let dir = tempfile::tempdir().unwrap();
576 let root = dir.path().to_path_buf();
577 fs::write(root.join(".gitignore"), "build/\ndraft.md\n").unwrap();
578
579 fs::write(root.join("README.md"), "").unwrap();
582 fs::write(root.join("draft.md"), "").unwrap();
583 fs::write(root.join(".hidden.md"), "").unwrap();
584 fs::create_dir(root.join("docs")).unwrap();
585 fs::write(root.join("docs").join("guide.md"), "").unwrap();
586 fs::create_dir(root.join("build")).unwrap();
587 fs::write(root.join("build").join("out.md"), "").unwrap();
588
589 let roots = vec![root.clone()];
590
591 assert!(!path_is_ignored_for_index(&roots, &root.join("README.md")));
593 assert!(!path_is_ignored_for_index(&roots, &root.join("docs/guide.md")));
594
595 assert!(path_is_ignored_for_index(&roots, &root.join("draft.md")));
597 assert!(path_is_ignored_for_index(&roots, &root.join("build/out.md")));
598
599 assert!(path_is_ignored_for_index(&roots, &root.join(".hidden.md")));
601
602 assert!(path_is_ignored_for_index(&roots, &root.join("node_modules/dep.md")));
605 assert!(path_is_ignored_for_index(&roots, &root.join("target/doc.md")));
606
607 let outside = dir.path().parent().unwrap().join("elsewhere.md");
609 assert!(!path_is_ignored_for_index(&roots, &outside));
610 }
611
612 #[test]
613 fn test_path_is_ignored_for_index_honors_nested_gitignore() {
614 use std::fs;
615
616 let dir = tempfile::tempdir().unwrap();
617 let root = dir.path().to_path_buf();
618 fs::create_dir(root.join("docs")).unwrap();
619 fs::write(root.join("docs").join(".gitignore"), "generated.md\n").unwrap();
620 fs::write(root.join("docs").join("generated.md"), "").unwrap();
621 fs::write(root.join("docs").join("manual.md"), "").unwrap();
622
623 let roots = vec![root.clone()];
624
625 assert!(path_is_ignored_for_index(&roots, &root.join("docs/generated.md")));
626 assert!(!path_is_ignored_for_index(&roots, &root.join("docs/manual.md")));
627 }
628
629 #[test]
630 fn test_path_is_ignored_for_index_workspace_under_target_dir() {
631 use std::fs;
632
633 let dir = tempfile::tempdir().unwrap();
636 let root = dir.path().join("target").join("my-docs");
637 fs::create_dir_all(&root).unwrap();
638 fs::write(root.join("README.md"), "").unwrap();
639 fs::create_dir(root.join("target")).unwrap();
640 fs::write(root.join("target").join("out.md"), "").unwrap();
641
642 let roots = vec![root.clone()];
643
644 assert!(!path_is_ignored_for_index(&roots, &root.join("README.md")));
647 assert!(path_is_ignored_for_index(&roots, &root.join("target/out.md")));
649 }
650}