1use regex::Regex;
22use serde::{Deserialize, Serialize};
23use std::collections::{HashMap, HashSet};
24use std::path::{Path, PathBuf};
25use std::sync::LazyLock;
26
27use crate::lint_context::LintContext;
28
29fn hex_digit_to_value(c: u8) -> Option<u8> {
35 match c {
36 b'0'..=b'9' => Some(c - b'0'),
37 b'a'..=b'f' => Some(c - b'a' + 10),
38 b'A'..=b'F' => Some(c - b'A' + 10),
39 _ => None,
40 }
41}
42
43fn url_decode(s: &str) -> String {
47 if !s.contains('%') {
49 return s.to_string();
50 }
51
52 let bytes = s.as_bytes();
53 let mut result = Vec::with_capacity(bytes.len());
54 let mut i = 0;
55
56 while i < bytes.len() {
57 if bytes[i] == b'%' && i + 2 < bytes.len() {
58 let hex1 = bytes[i + 1];
60 let hex2 = bytes[i + 2];
61 if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
62 result.push(d1 * 16 + d2);
63 i += 3;
64 continue;
65 }
66 }
67 result.push(bytes[i]);
68 i += 1;
69 }
70
71 String::from_utf8(result).unwrap_or_else(|_| s.to_string())
73}
74
75static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
85
86static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
89 LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
90
91static URL_EXTRACT_REGEX: LazyLock<Regex> =
94 LazyLock::new(|| Regex::new(r#"]\(\s*([^>)\s#]+)(#[^)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
95
96pub(crate) static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
98 LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
99
100const MARKDOWN_EXTENSIONS: &[&str] = &[
102 ".md",
103 ".markdown",
104 ".mdx",
105 ".mkd",
106 ".mkdn",
107 ".mdown",
108 ".mdwn",
109 ".qmd",
110 ".rmd",
111];
112
113#[inline]
115fn is_markdown_file(path: &str) -> bool {
116 let path_lower = path.to_lowercase();
117 MARKDOWN_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext))
118}
119
120fn strip_query_and_fragment(url: &str) -> &str {
123 let query_pos = url.find('?');
124 let fragment_pos = url.find('#');
125
126 match (query_pos, fragment_pos) {
127 (Some(q), Some(f)) => &url[..q.min(f)],
128 (Some(q), None) => &url[..q],
129 (None, Some(f)) => &url[..f],
130 (None, None) => url,
131 }
132}
133
134pub fn extract_cross_file_links(ctx: &LintContext) -> Vec<CrossFileLinkIndex> {
142 let content = ctx.content;
143
144 if content.is_empty() || !content.contains("](") {
146 return Vec::new();
147 }
148
149 let mut links = Vec::new();
150 let lines: Vec<&str> = content.lines().collect();
151 let line_index = &ctx.line_index;
152
153 let mut processed_lines = HashSet::new();
156
157 for link in &ctx.links {
158 let line_idx = link.line - 1;
159 if line_idx >= lines.len() {
160 continue;
161 }
162
163 if !processed_lines.insert(line_idx) {
165 continue;
166 }
167
168 let line = lines[line_idx];
169 if !line.contains("](") {
170 continue;
171 }
172
173 for link_match in LINK_START_REGEX.find_iter(line) {
175 let start_pos = link_match.start();
176 let end_pos = link_match.end();
177
178 let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
180 let absolute_start_pos = line_start_byte + start_pos;
181
182 if ctx.is_in_code_span_byte(absolute_start_pos) {
184 continue;
185 }
186
187 let caps_result = URL_EXTRACT_ANGLE_BRACKET_REGEX
190 .captures_at(line, end_pos - 1)
191 .or_else(|| URL_EXTRACT_REGEX.captures_at(line, end_pos - 1));
192
193 if let Some(caps) = caps_result
194 && let Some(url_group) = caps.get(1)
195 {
196 let file_path = url_group.as_str().trim();
197
198 if file_path.is_empty()
201 || PROTOCOL_DOMAIN_REGEX.is_match(file_path)
202 || file_path.starts_with("www.")
203 || file_path.starts_with('#')
204 || file_path.starts_with("{{")
205 || file_path.starts_with("{%")
206 || file_path.starts_with('/')
207 || file_path.starts_with('~')
208 || file_path.starts_with('@')
209 || (file_path.starts_with('`') && file_path.ends_with('`'))
210 {
211 continue;
212 }
213
214 let file_path = strip_query_and_fragment(file_path);
216
217 let fragment = caps.get(2).map_or("", |m| m.as_str().trim_start_matches('#'));
219
220 if is_markdown_file(file_path) {
222 links.push(CrossFileLinkIndex {
223 target_path: file_path.to_string(),
224 fragment: fragment.to_string(),
225 line: link.line,
226 column: url_group.start() + 1,
227 });
228 }
229 }
230 }
231 }
232
233 links
234}
235
236#[cfg(feature = "native")]
238const CACHE_MAGIC: &[u8; 4] = b"RWSI";
239
240#[cfg(feature = "native")]
242const CACHE_FORMAT_VERSION: u32 = 5;
243
244#[cfg(feature = "native")]
246const CACHE_FILE_NAME: &str = "workspace_index.bin";
247
248#[derive(Debug, Default, Clone, Serialize, Deserialize)]
253pub struct WorkspaceIndex {
254 files: HashMap<PathBuf, FileIndex>,
256 reverse_deps: HashMap<PathBuf, HashSet<PathBuf>>,
259 version: u64,
261}
262
263#[derive(Debug, Clone, Default, Serialize, Deserialize)]
265pub struct FileIndex {
266 pub headings: Vec<HeadingIndex>,
268 pub reference_links: Vec<ReferenceLinkIndex>,
270 pub cross_file_links: Vec<CrossFileLinkIndex>,
272 pub defined_references: HashSet<String>,
275 pub content_hash: String,
277 anchor_to_heading: HashMap<String, usize>,
280 html_anchors: HashSet<String>,
283 attribute_anchors: HashSet<String>,
287 pub file_disabled_rules: HashSet<String>,
290 pub persistent_transitions: Vec<(usize, HashSet<String>, HashSet<String>)>,
293 pub line_disabled_rules: HashMap<usize, HashSet<String>>,
295}
296
297#[derive(Debug, Clone, Serialize, Deserialize)]
299pub struct HeadingIndex {
300 pub text: String,
302 pub auto_anchor: String,
304 pub custom_anchor: Option<String>,
306 pub line: usize,
308 #[serde(default)]
310 pub is_setext: bool,
311}
312
313#[derive(Debug, Clone, Serialize, Deserialize)]
315pub struct ReferenceLinkIndex {
316 pub reference_id: String,
318 pub line: usize,
320 pub column: usize,
322}
323
324#[derive(Debug, Clone, Serialize, Deserialize)]
326pub struct CrossFileLinkIndex {
327 pub target_path: String,
329 pub fragment: String,
331 pub line: usize,
333 pub column: usize,
335}
336
337#[derive(Debug, Clone, Serialize, Deserialize)]
339pub struct VulnerableAnchor {
340 pub file: PathBuf,
342 pub line: usize,
344 pub text: String,
346}
347
348impl WorkspaceIndex {
349 pub fn new() -> Self {
351 Self::default()
352 }
353
354 pub fn version(&self) -> u64 {
356 self.version
357 }
358
359 pub fn file_count(&self) -> usize {
361 self.files.len()
362 }
363
364 pub fn contains_file(&self, path: &Path) -> bool {
366 self.files.contains_key(path)
367 }
368
369 pub fn get_file(&self, path: &Path) -> Option<&FileIndex> {
371 self.files.get(path)
372 }
373
374 pub fn insert_file(&mut self, path: PathBuf, index: FileIndex) {
376 self.files.insert(path, index);
377 self.version = self.version.wrapping_add(1);
378 }
379
380 pub fn remove_file(&mut self, path: &Path) -> Option<FileIndex> {
382 self.clear_reverse_deps_for(path);
384
385 let result = self.files.remove(path);
386 if result.is_some() {
387 self.version = self.version.wrapping_add(1);
388 }
389 result
390 }
391
392 pub fn get_vulnerable_anchors(&self) -> HashMap<String, Vec<VulnerableAnchor>> {
402 let mut vulnerable: HashMap<String, Vec<VulnerableAnchor>> = HashMap::new();
403
404 for (file_path, file_index) in &self.files {
405 for heading in &file_index.headings {
406 if heading.custom_anchor.is_none() && !heading.auto_anchor.is_empty() {
408 let anchor_key = heading.auto_anchor.to_lowercase();
409 vulnerable.entry(anchor_key).or_default().push(VulnerableAnchor {
410 file: file_path.clone(),
411 line: heading.line,
412 text: heading.text.clone(),
413 });
414 }
415 }
416 }
417
418 vulnerable
419 }
420
421 pub fn all_headings(&self) -> impl Iterator<Item = (&Path, &HeadingIndex)> {
423 self.files
424 .iter()
425 .flat_map(|(path, index)| index.headings.iter().map(move |h| (path.as_path(), h)))
426 }
427
428 pub fn files(&self) -> impl Iterator<Item = (&Path, &FileIndex)> {
430 self.files.iter().map(|(p, i)| (p.as_path(), i))
431 }
432
433 pub fn clear(&mut self) {
435 self.files.clear();
436 self.reverse_deps.clear();
437 self.version = self.version.wrapping_add(1);
438 }
439
440 pub fn update_file(&mut self, path: &Path, index: FileIndex) {
447 self.clear_reverse_deps_as_source(path);
450
451 for link in &index.cross_file_links {
453 let target = self.resolve_target_path(path, &link.target_path);
454 self.reverse_deps.entry(target).or_default().insert(path.to_path_buf());
455 }
456
457 self.files.insert(path.to_path_buf(), index);
458 self.version = self.version.wrapping_add(1);
459 }
460
461 pub fn get_dependents(&self, path: &Path) -> Vec<PathBuf> {
466 self.reverse_deps
467 .get(path)
468 .map(|set| set.iter().cloned().collect())
469 .unwrap_or_default()
470 }
471
472 pub fn is_file_stale(&self, path: &Path, current_hash: &str) -> bool {
476 self.files.get(path).is_none_or(|f| f.content_hash != current_hash)
477 }
478
479 pub fn retain_only(&mut self, current_files: &std::collections::HashSet<PathBuf>) -> usize {
484 let before_count = self.files.len();
485
486 let to_remove: Vec<PathBuf> = self
488 .files
489 .keys()
490 .filter(|path| !current_files.contains(*path))
491 .cloned()
492 .collect();
493
494 for path in &to_remove {
496 self.remove_file(path);
497 }
498
499 before_count - self.files.len()
500 }
501
502 #[cfg(feature = "native")]
509 pub fn save_to_cache(&self, cache_dir: &Path) -> std::io::Result<()> {
510 use std::fs;
511 use std::io::Write;
512
513 fs::create_dir_all(cache_dir)?;
515
516 let encoded = postcard::to_allocvec(self)
518 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
519
520 let mut cache_data = Vec::with_capacity(8 + encoded.len());
522 cache_data.extend_from_slice(CACHE_MAGIC);
523 cache_data.extend_from_slice(&CACHE_FORMAT_VERSION.to_le_bytes());
524 cache_data.extend_from_slice(&encoded);
525
526 let final_path = cache_dir.join(CACHE_FILE_NAME);
528 let temp_path = cache_dir.join(format!("{}.tmp.{}", CACHE_FILE_NAME, std::process::id()));
529
530 {
532 let mut file = fs::File::create(&temp_path)?;
533 file.write_all(&cache_data)?;
534 file.sync_all()?;
535 }
536
537 fs::rename(&temp_path, &final_path)?;
539
540 log::debug!(
541 "Saved workspace index to cache: {} files, {} bytes (format v{})",
542 self.files.len(),
543 cache_data.len(),
544 CACHE_FORMAT_VERSION
545 );
546
547 Ok(())
548 }
549
550 #[cfg(feature = "native")]
558 pub fn load_from_cache(cache_dir: &Path) -> Option<Self> {
559 use std::fs;
560
561 let path = cache_dir.join(CACHE_FILE_NAME);
562 let data = fs::read(&path).ok()?;
563
564 if data.len() < 8 {
566 log::warn!("Workspace index cache too small, discarding");
567 let _ = fs::remove_file(&path);
568 return None;
569 }
570
571 if &data[0..4] != CACHE_MAGIC {
573 log::warn!("Workspace index cache has invalid magic header, discarding");
574 let _ = fs::remove_file(&path);
575 return None;
576 }
577
578 let version = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
580 if version != CACHE_FORMAT_VERSION {
581 log::info!(
582 "Workspace index cache format version mismatch (got {version}, expected {CACHE_FORMAT_VERSION}), rebuilding"
583 );
584 let _ = fs::remove_file(&path);
585 return None;
586 }
587
588 match postcard::from_bytes::<Self>(&data[8..]) {
590 Ok(index) => {
591 log::debug!(
592 "Loaded workspace index from cache: {} files (format v{})",
593 index.files.len(),
594 version
595 );
596 Some(index)
597 }
598 Err(e) => {
599 log::warn!("Failed to deserialize workspace index cache: {e}");
600 let _ = fs::remove_file(&path);
601 None
602 }
603 }
604 }
605
606 fn clear_reverse_deps_as_source(&mut self, path: &Path) {
611 for deps in self.reverse_deps.values_mut() {
612 deps.remove(path);
613 }
614 self.reverse_deps.retain(|_, deps| !deps.is_empty());
616 }
617
618 fn clear_reverse_deps_for(&mut self, path: &Path) {
623 self.clear_reverse_deps_as_source(path);
625
626 self.reverse_deps.remove(path);
628 }
629
630 fn resolve_target_path(&self, source_file: &Path, relative_target: &str) -> PathBuf {
632 let source_dir = source_file.parent().unwrap_or(Path::new(""));
634
635 let target = source_dir.join(relative_target);
637
638 Self::normalize_path(&target)
640 }
641
642 fn normalize_path(path: &Path) -> PathBuf {
644 let mut components = Vec::new();
645
646 for component in path.components() {
647 match component {
648 std::path::Component::ParentDir => {
649 if !components.is_empty() {
651 components.pop();
652 }
653 }
654 std::path::Component::CurDir => {
655 }
657 _ => {
658 components.push(component);
659 }
660 }
661 }
662
663 components.iter().collect()
664 }
665}
666
667impl FileIndex {
668 pub fn new() -> Self {
670 Self::default()
671 }
672
673 pub fn with_hash(content_hash: String) -> Self {
675 Self {
676 content_hash,
677 ..Default::default()
678 }
679 }
680
681 pub fn add_heading(&mut self, heading: HeadingIndex) {
685 let index = self.headings.len();
686
687 self.anchor_to_heading.insert(heading.auto_anchor.to_lowercase(), index);
689
690 if let Some(ref custom) = heading.custom_anchor {
692 self.anchor_to_heading.insert(custom.to_lowercase(), index);
693 }
694
695 self.headings.push(heading);
696 }
697
698 pub fn add_anchor_alias(&mut self, anchor: &str, heading_index: usize) {
701 if heading_index < self.headings.len() {
702 self.anchor_to_heading.insert(anchor.to_lowercase(), heading_index);
703 }
704 }
705
706 pub fn has_anchor(&self, anchor: &str) -> bool {
717 let lower = anchor.to_lowercase();
718
719 if self.anchor_to_heading.contains_key(&lower)
721 || self.html_anchors.contains(&lower)
722 || self.attribute_anchors.contains(&lower)
723 {
724 return true;
725 }
726
727 if anchor.contains('%') {
729 let decoded = url_decode(anchor).to_lowercase();
730 if decoded != lower {
731 return self.anchor_to_heading.contains_key(&decoded)
732 || self.html_anchors.contains(&decoded)
733 || self.attribute_anchors.contains(&decoded);
734 }
735 }
736
737 false
738 }
739
740 pub fn add_html_anchor(&mut self, anchor: &str) {
742 if !anchor.is_empty() {
743 self.html_anchors.insert(anchor.to_lowercase());
744 }
745 }
746
747 pub fn add_attribute_anchor(&mut self, anchor: &str) {
749 if !anchor.is_empty() {
750 self.attribute_anchors.insert(anchor.to_lowercase());
751 }
752 }
753
754 pub fn get_heading_by_anchor(&self, anchor: &str) -> Option<&HeadingIndex> {
758 self.anchor_to_heading
759 .get(&anchor.to_lowercase())
760 .and_then(|&idx| self.headings.get(idx))
761 }
762
763 pub fn add_reference_link(&mut self, link: ReferenceLinkIndex) {
765 self.reference_links.push(link);
766 }
767
768 pub fn is_rule_disabled_at_line(&self, rule_name: &str, line: usize) -> bool {
773 if self.file_disabled_rules.contains("*") || self.file_disabled_rules.contains(rule_name) {
775 return true;
776 }
777
778 if let Some(rules) = self.line_disabled_rules.get(&line)
780 && (rules.contains("*") || rules.contains(rule_name))
781 {
782 return true;
783 }
784
785 if !self.persistent_transitions.is_empty() {
787 let idx = match self.persistent_transitions.binary_search_by_key(&line, |t| t.0) {
788 Ok(i) => Some(i),
789 Err(i) => {
790 if i > 0 {
791 Some(i - 1)
792 } else {
793 None
794 }
795 }
796 };
797 if let Some(i) = idx {
798 let (_, ref disabled, ref enabled) = self.persistent_transitions[i];
799 if disabled.contains("*") {
800 return !enabled.contains(rule_name);
801 }
802 return disabled.contains(rule_name);
803 }
804 }
805
806 false
807 }
808
809 pub fn add_cross_file_link(&mut self, link: CrossFileLinkIndex) {
811 let is_duplicate = self.cross_file_links.iter().any(|existing| {
814 existing.target_path == link.target_path && existing.fragment == link.fragment && existing.line == link.line
815 });
816 if !is_duplicate {
817 self.cross_file_links.push(link);
818 }
819 }
820
821 pub fn add_defined_reference(&mut self, ref_id: String) {
823 self.defined_references.insert(ref_id);
824 }
825
826 pub fn has_defined_reference(&self, ref_id: &str) -> bool {
828 self.defined_references.contains(ref_id)
829 }
830
831 pub fn hash_matches(&self, hash: &str) -> bool {
833 self.content_hash == hash
834 }
835
836 pub fn heading_count(&self) -> usize {
838 self.headings.len()
839 }
840
841 pub fn reference_link_count(&self) -> usize {
843 self.reference_links.len()
844 }
845}
846
847#[cfg(test)]
848mod tests {
849 use super::*;
850
851 #[test]
852 fn test_workspace_index_basic() {
853 let mut index = WorkspaceIndex::new();
854 assert_eq!(index.file_count(), 0);
855 assert_eq!(index.version(), 0);
856
857 let mut file_index = FileIndex::with_hash("abc123".to_string());
858 file_index.add_heading(HeadingIndex {
859 text: "Installation".to_string(),
860 auto_anchor: "installation".to_string(),
861 custom_anchor: None,
862 line: 1,
863 is_setext: false,
864 });
865
866 index.insert_file(PathBuf::from("docs/install.md"), file_index);
867 assert_eq!(index.file_count(), 1);
868 assert_eq!(index.version(), 1);
869
870 assert!(index.contains_file(Path::new("docs/install.md")));
871 assert!(!index.contains_file(Path::new("docs/other.md")));
872 }
873
874 #[test]
875 fn test_vulnerable_anchors() {
876 let mut index = WorkspaceIndex::new();
877
878 let mut file1 = FileIndex::new();
880 file1.add_heading(HeadingIndex {
881 text: "Getting Started".to_string(),
882 auto_anchor: "getting-started".to_string(),
883 custom_anchor: None,
884 line: 1,
885 is_setext: false,
886 });
887 index.insert_file(PathBuf::from("docs/guide.md"), file1);
888
889 let mut file2 = FileIndex::new();
891 file2.add_heading(HeadingIndex {
892 text: "Installation".to_string(),
893 auto_anchor: "installation".to_string(),
894 custom_anchor: Some("install".to_string()),
895 line: 1,
896 is_setext: false,
897 });
898 index.insert_file(PathBuf::from("docs/install.md"), file2);
899
900 let vulnerable = index.get_vulnerable_anchors();
901 assert_eq!(vulnerable.len(), 1);
902 assert!(vulnerable.contains_key("getting-started"));
903 assert!(!vulnerable.contains_key("installation"));
904
905 let anchors = vulnerable.get("getting-started").unwrap();
906 assert_eq!(anchors.len(), 1);
907 assert_eq!(anchors[0].file, PathBuf::from("docs/guide.md"));
908 assert_eq!(anchors[0].text, "Getting Started");
909 }
910
911 #[test]
912 fn test_vulnerable_anchors_multiple_files_same_anchor() {
913 let mut index = WorkspaceIndex::new();
916
917 let mut file1 = FileIndex::new();
919 file1.add_heading(HeadingIndex {
920 text: "Installation".to_string(),
921 auto_anchor: "installation".to_string(),
922 custom_anchor: None,
923 line: 1,
924 is_setext: false,
925 });
926 index.insert_file(PathBuf::from("docs/en/guide.md"), file1);
927
928 let mut file2 = FileIndex::new();
930 file2.add_heading(HeadingIndex {
931 text: "Installation".to_string(),
932 auto_anchor: "installation".to_string(),
933 custom_anchor: None,
934 line: 5,
935 is_setext: false,
936 });
937 index.insert_file(PathBuf::from("docs/fr/guide.md"), file2);
938
939 let mut file3 = FileIndex::new();
941 file3.add_heading(HeadingIndex {
942 text: "Installation".to_string(),
943 auto_anchor: "installation".to_string(),
944 custom_anchor: Some("install".to_string()),
945 line: 10,
946 is_setext: false,
947 });
948 index.insert_file(PathBuf::from("docs/de/guide.md"), file3);
949
950 let vulnerable = index.get_vulnerable_anchors();
951 assert_eq!(vulnerable.len(), 1); assert!(vulnerable.contains_key("installation"));
953
954 let anchors = vulnerable.get("installation").unwrap();
955 assert_eq!(anchors.len(), 2, "Should collect both vulnerable anchors");
957
958 let files: std::collections::HashSet<_> = anchors.iter().map(|a| &a.file).collect();
960 assert!(files.contains(&PathBuf::from("docs/en/guide.md")));
961 assert!(files.contains(&PathBuf::from("docs/fr/guide.md")));
962 }
963
964 #[test]
965 fn test_file_index_hash() {
966 let index = FileIndex::with_hash("hash123".to_string());
967 assert!(index.hash_matches("hash123"));
968 assert!(!index.hash_matches("other"));
969 }
970
971 #[test]
972 fn test_version_increment() {
973 let mut index = WorkspaceIndex::new();
974 assert_eq!(index.version(), 0);
975
976 index.insert_file(PathBuf::from("a.md"), FileIndex::new());
977 assert_eq!(index.version(), 1);
978
979 index.insert_file(PathBuf::from("b.md"), FileIndex::new());
980 assert_eq!(index.version(), 2);
981
982 index.remove_file(Path::new("a.md"));
983 assert_eq!(index.version(), 3);
984
985 index.remove_file(Path::new("nonexistent.md"));
987 assert_eq!(index.version(), 3);
988 }
989
990 #[test]
991 fn test_reverse_deps_basic() {
992 let mut index = WorkspaceIndex::new();
993
994 let mut file_a = FileIndex::new();
996 file_a.add_cross_file_link(CrossFileLinkIndex {
997 target_path: "b.md".to_string(),
998 fragment: "section".to_string(),
999 line: 10,
1000 column: 5,
1001 });
1002 index.update_file(Path::new("docs/a.md"), file_a);
1003
1004 let dependents = index.get_dependents(Path::new("docs/b.md"));
1006 assert_eq!(dependents.len(), 1);
1007 assert_eq!(dependents[0], PathBuf::from("docs/a.md"));
1008
1009 let a_dependents = index.get_dependents(Path::new("docs/a.md"));
1011 assert!(a_dependents.is_empty());
1012 }
1013
1014 #[test]
1015 fn test_reverse_deps_multiple() {
1016 let mut index = WorkspaceIndex::new();
1017
1018 let mut file_a = FileIndex::new();
1020 file_a.add_cross_file_link(CrossFileLinkIndex {
1021 target_path: "../b.md".to_string(),
1022 fragment: "".to_string(),
1023 line: 1,
1024 column: 1,
1025 });
1026 index.update_file(Path::new("docs/sub/a.md"), file_a);
1027
1028 let mut file_c = FileIndex::new();
1029 file_c.add_cross_file_link(CrossFileLinkIndex {
1030 target_path: "b.md".to_string(),
1031 fragment: "".to_string(),
1032 line: 1,
1033 column: 1,
1034 });
1035 index.update_file(Path::new("docs/c.md"), file_c);
1036
1037 let dependents = index.get_dependents(Path::new("docs/b.md"));
1039 assert_eq!(dependents.len(), 2);
1040 assert!(dependents.contains(&PathBuf::from("docs/sub/a.md")));
1041 assert!(dependents.contains(&PathBuf::from("docs/c.md")));
1042 }
1043
1044 #[test]
1045 fn test_reverse_deps_update_clears_old() {
1046 let mut index = WorkspaceIndex::new();
1047
1048 let mut file_a = FileIndex::new();
1050 file_a.add_cross_file_link(CrossFileLinkIndex {
1051 target_path: "b.md".to_string(),
1052 fragment: "".to_string(),
1053 line: 1,
1054 column: 1,
1055 });
1056 index.update_file(Path::new("docs/a.md"), file_a);
1057
1058 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1060
1061 let mut file_a_updated = FileIndex::new();
1063 file_a_updated.add_cross_file_link(CrossFileLinkIndex {
1064 target_path: "c.md".to_string(),
1065 fragment: "".to_string(),
1066 line: 1,
1067 column: 1,
1068 });
1069 index.update_file(Path::new("docs/a.md"), file_a_updated);
1070
1071 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1073
1074 let c_deps = index.get_dependents(Path::new("docs/c.md"));
1076 assert_eq!(c_deps.len(), 1);
1077 assert_eq!(c_deps[0], PathBuf::from("docs/a.md"));
1078 }
1079
1080 #[test]
1081 fn test_reverse_deps_remove_file() {
1082 let mut index = WorkspaceIndex::new();
1083
1084 let mut file_a = FileIndex::new();
1086 file_a.add_cross_file_link(CrossFileLinkIndex {
1087 target_path: "b.md".to_string(),
1088 fragment: "".to_string(),
1089 line: 1,
1090 column: 1,
1091 });
1092 index.update_file(Path::new("docs/a.md"), file_a);
1093
1094 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1096
1097 index.remove_file(Path::new("docs/a.md"));
1099
1100 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1102 }
1103
1104 #[test]
1105 fn test_normalize_path() {
1106 let path = Path::new("docs/sub/../other.md");
1108 let normalized = WorkspaceIndex::normalize_path(path);
1109 assert_eq!(normalized, PathBuf::from("docs/other.md"));
1110
1111 let path2 = Path::new("docs/./other.md");
1113 let normalized2 = WorkspaceIndex::normalize_path(path2);
1114 assert_eq!(normalized2, PathBuf::from("docs/other.md"));
1115
1116 let path3 = Path::new("a/b/c/../../d.md");
1118 let normalized3 = WorkspaceIndex::normalize_path(path3);
1119 assert_eq!(normalized3, PathBuf::from("a/d.md"));
1120 }
1121
1122 #[test]
1123 fn test_clear_clears_reverse_deps() {
1124 let mut index = WorkspaceIndex::new();
1125
1126 let mut file_a = FileIndex::new();
1128 file_a.add_cross_file_link(CrossFileLinkIndex {
1129 target_path: "b.md".to_string(),
1130 fragment: "".to_string(),
1131 line: 1,
1132 column: 1,
1133 });
1134 index.update_file(Path::new("docs/a.md"), file_a);
1135
1136 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1138
1139 index.clear();
1141
1142 assert_eq!(index.file_count(), 0);
1144 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1145 }
1146
1147 #[test]
1148 fn test_is_file_stale() {
1149 let mut index = WorkspaceIndex::new();
1150
1151 assert!(index.is_file_stale(Path::new("nonexistent.md"), "hash123"));
1153
1154 let file_index = FileIndex::with_hash("hash123".to_string());
1156 index.insert_file(PathBuf::from("docs/test.md"), file_index);
1157
1158 assert!(!index.is_file_stale(Path::new("docs/test.md"), "hash123"));
1160
1161 assert!(index.is_file_stale(Path::new("docs/test.md"), "different_hash"));
1163 }
1164
1165 #[cfg(feature = "native")]
1166 #[test]
1167 fn test_cache_roundtrip() {
1168 use std::fs;
1169
1170 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_roundtrip");
1172 let _ = fs::remove_dir_all(&temp_dir);
1173 fs::create_dir_all(&temp_dir).unwrap();
1174
1175 let mut index = WorkspaceIndex::new();
1177
1178 let mut file1 = FileIndex::with_hash("abc123".to_string());
1179 file1.add_heading(HeadingIndex {
1180 text: "Test Heading".to_string(),
1181 auto_anchor: "test-heading".to_string(),
1182 custom_anchor: Some("test".to_string()),
1183 line: 1,
1184 is_setext: false,
1185 });
1186 file1.add_cross_file_link(CrossFileLinkIndex {
1187 target_path: "./other.md".to_string(),
1188 fragment: "section".to_string(),
1189 line: 5,
1190 column: 3,
1191 });
1192 index.update_file(Path::new("docs/file1.md"), file1);
1193
1194 let mut file2 = FileIndex::with_hash("def456".to_string());
1195 file2.add_heading(HeadingIndex {
1196 text: "Another Heading".to_string(),
1197 auto_anchor: "another-heading".to_string(),
1198 custom_anchor: None,
1199 line: 1,
1200 is_setext: false,
1201 });
1202 index.update_file(Path::new("docs/other.md"), file2);
1203
1204 index.save_to_cache(&temp_dir).expect("Failed to save cache");
1206
1207 assert!(temp_dir.join("workspace_index.bin").exists());
1209
1210 let loaded = WorkspaceIndex::load_from_cache(&temp_dir).expect("Failed to load cache");
1212
1213 assert_eq!(loaded.file_count(), 2);
1215 assert!(loaded.contains_file(Path::new("docs/file1.md")));
1216 assert!(loaded.contains_file(Path::new("docs/other.md")));
1217
1218 let file1_loaded = loaded.get_file(Path::new("docs/file1.md")).unwrap();
1220 assert_eq!(file1_loaded.content_hash, "abc123");
1221 assert_eq!(file1_loaded.headings.len(), 1);
1222 assert_eq!(file1_loaded.headings[0].text, "Test Heading");
1223 assert_eq!(file1_loaded.headings[0].custom_anchor, Some("test".to_string()));
1224 assert_eq!(file1_loaded.cross_file_links.len(), 1);
1225 assert_eq!(file1_loaded.cross_file_links[0].target_path, "./other.md");
1226
1227 let dependents = loaded.get_dependents(Path::new("docs/other.md"));
1229 assert_eq!(dependents.len(), 1);
1230 assert_eq!(dependents[0], PathBuf::from("docs/file1.md"));
1231
1232 let _ = fs::remove_dir_all(&temp_dir);
1234 }
1235
1236 #[cfg(feature = "native")]
1237 #[test]
1238 fn test_cache_missing_file() {
1239 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_missing");
1240 let _ = std::fs::remove_dir_all(&temp_dir);
1241
1242 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1244 assert!(result.is_none());
1245 }
1246
1247 #[cfg(feature = "native")]
1248 #[test]
1249 fn test_cache_corrupted_file() {
1250 use std::fs;
1251
1252 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_corrupted");
1253 let _ = fs::remove_dir_all(&temp_dir);
1254 fs::create_dir_all(&temp_dir).unwrap();
1255
1256 fs::write(temp_dir.join("workspace_index.bin"), b"bad").unwrap();
1258
1259 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1261 assert!(result.is_none());
1262
1263 assert!(!temp_dir.join("workspace_index.bin").exists());
1265
1266 let _ = fs::remove_dir_all(&temp_dir);
1268 }
1269
1270 #[cfg(feature = "native")]
1271 #[test]
1272 fn test_cache_invalid_magic() {
1273 use std::fs;
1274
1275 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_invalid_magic");
1276 let _ = fs::remove_dir_all(&temp_dir);
1277 fs::create_dir_all(&temp_dir).unwrap();
1278
1279 let mut data = Vec::new();
1281 data.extend_from_slice(b"XXXX"); data.extend_from_slice(&1u32.to_le_bytes()); data.extend_from_slice(&[0; 100]); fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
1285
1286 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1288 assert!(result.is_none());
1289
1290 assert!(!temp_dir.join("workspace_index.bin").exists());
1292
1293 let _ = fs::remove_dir_all(&temp_dir);
1295 }
1296
1297 #[cfg(feature = "native")]
1298 #[test]
1299 fn test_cache_version_mismatch() {
1300 use std::fs;
1301
1302 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_version_mismatch");
1303 let _ = fs::remove_dir_all(&temp_dir);
1304 fs::create_dir_all(&temp_dir).unwrap();
1305
1306 let mut data = Vec::new();
1308 data.extend_from_slice(b"RWSI"); data.extend_from_slice(&999u32.to_le_bytes()); data.extend_from_slice(&[0; 100]); fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
1312
1313 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1315 assert!(result.is_none());
1316
1317 assert!(!temp_dir.join("workspace_index.bin").exists());
1319
1320 let _ = fs::remove_dir_all(&temp_dir);
1322 }
1323
1324 #[cfg(feature = "native")]
1325 #[test]
1326 fn test_cache_atomic_write() {
1327 use std::fs;
1328
1329 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_atomic");
1331 let _ = fs::remove_dir_all(&temp_dir);
1332 fs::create_dir_all(&temp_dir).unwrap();
1333
1334 let index = WorkspaceIndex::new();
1335 index.save_to_cache(&temp_dir).expect("Failed to save");
1336
1337 let entries: Vec<_> = fs::read_dir(&temp_dir).unwrap().collect();
1339 assert_eq!(entries.len(), 1);
1340 assert!(temp_dir.join("workspace_index.bin").exists());
1341
1342 let _ = fs::remove_dir_all(&temp_dir);
1344 }
1345
1346 #[test]
1347 fn test_has_anchor_auto_generated() {
1348 let mut file_index = FileIndex::new();
1349 file_index.add_heading(HeadingIndex {
1350 text: "Installation Guide".to_string(),
1351 auto_anchor: "installation-guide".to_string(),
1352 custom_anchor: None,
1353 line: 1,
1354 is_setext: false,
1355 });
1356
1357 assert!(file_index.has_anchor("installation-guide"));
1359
1360 assert!(file_index.has_anchor("Installation-Guide"));
1362 assert!(file_index.has_anchor("INSTALLATION-GUIDE"));
1363
1364 assert!(!file_index.has_anchor("nonexistent"));
1366 }
1367
1368 #[test]
1369 fn test_has_anchor_custom() {
1370 let mut file_index = FileIndex::new();
1371 file_index.add_heading(HeadingIndex {
1372 text: "Installation Guide".to_string(),
1373 auto_anchor: "installation-guide".to_string(),
1374 custom_anchor: Some("install".to_string()),
1375 line: 1,
1376 is_setext: false,
1377 });
1378
1379 assert!(file_index.has_anchor("installation-guide"));
1381
1382 assert!(file_index.has_anchor("install"));
1384 assert!(file_index.has_anchor("Install")); assert!(!file_index.has_anchor("nonexistent"));
1388 }
1389
1390 #[test]
1391 fn test_get_heading_by_anchor() {
1392 let mut file_index = FileIndex::new();
1393 file_index.add_heading(HeadingIndex {
1394 text: "Installation Guide".to_string(),
1395 auto_anchor: "installation-guide".to_string(),
1396 custom_anchor: Some("install".to_string()),
1397 line: 10,
1398 is_setext: false,
1399 });
1400 file_index.add_heading(HeadingIndex {
1401 text: "Configuration".to_string(),
1402 auto_anchor: "configuration".to_string(),
1403 custom_anchor: None,
1404 line: 20,
1405 is_setext: false,
1406 });
1407
1408 let heading = file_index.get_heading_by_anchor("installation-guide");
1410 assert!(heading.is_some());
1411 assert_eq!(heading.unwrap().text, "Installation Guide");
1412 assert_eq!(heading.unwrap().line, 10);
1413
1414 let heading = file_index.get_heading_by_anchor("install");
1416 assert!(heading.is_some());
1417 assert_eq!(heading.unwrap().text, "Installation Guide");
1418
1419 let heading = file_index.get_heading_by_anchor("configuration");
1421 assert!(heading.is_some());
1422 assert_eq!(heading.unwrap().text, "Configuration");
1423 assert_eq!(heading.unwrap().line, 20);
1424
1425 assert!(file_index.get_heading_by_anchor("nonexistent").is_none());
1427 }
1428
1429 #[test]
1430 fn test_anchor_lookup_many_headings() {
1431 let mut file_index = FileIndex::new();
1433
1434 for i in 0..100 {
1436 file_index.add_heading(HeadingIndex {
1437 text: format!("Heading {i}"),
1438 auto_anchor: format!("heading-{i}"),
1439 custom_anchor: Some(format!("h{i}")),
1440 line: i + 1,
1441 is_setext: false,
1442 });
1443 }
1444
1445 for i in 0..100 {
1447 assert!(file_index.has_anchor(&format!("heading-{i}")));
1448 assert!(file_index.has_anchor(&format!("h{i}")));
1449
1450 let heading = file_index.get_heading_by_anchor(&format!("heading-{i}"));
1451 assert!(heading.is_some());
1452 assert_eq!(heading.unwrap().line, i + 1);
1453 }
1454 }
1455
1456 #[test]
1461 fn test_extract_cross_file_links_basic() {
1462 use crate::config::MarkdownFlavor;
1463
1464 let content = "# Test\n\nSee [link](./other.md) for info.\n";
1465 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1466 let links = extract_cross_file_links(&ctx);
1467
1468 assert_eq!(links.len(), 1);
1469 assert_eq!(links[0].target_path, "./other.md");
1470 assert_eq!(links[0].fragment, "");
1471 assert_eq!(links[0].line, 3);
1472 assert_eq!(links[0].column, 12);
1474 }
1475
1476 #[test]
1477 fn test_extract_cross_file_links_with_fragment() {
1478 use crate::config::MarkdownFlavor;
1479
1480 let content = "Check [guide](./guide.md#install) here.\n";
1481 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1482 let links = extract_cross_file_links(&ctx);
1483
1484 assert_eq!(links.len(), 1);
1485 assert_eq!(links[0].target_path, "./guide.md");
1486 assert_eq!(links[0].fragment, "install");
1487 assert_eq!(links[0].line, 1);
1488 assert_eq!(links[0].column, 15);
1490 }
1491
1492 #[test]
1493 fn test_extract_cross_file_links_multiple_on_same_line() {
1494 use crate::config::MarkdownFlavor;
1495
1496 let content = "See [a](a.md) and [b](b.md) here.\n";
1497 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1498 let links = extract_cross_file_links(&ctx);
1499
1500 assert_eq!(links.len(), 2);
1501
1502 assert_eq!(links[0].target_path, "a.md");
1503 assert_eq!(links[0].line, 1);
1504 assert_eq!(links[0].column, 9);
1506
1507 assert_eq!(links[1].target_path, "b.md");
1508 assert_eq!(links[1].line, 1);
1509 assert_eq!(links[1].column, 23);
1511 }
1512
1513 #[test]
1514 fn test_extract_cross_file_links_angle_brackets() {
1515 use crate::config::MarkdownFlavor;
1516
1517 let content = "See [link](<path/with (parens).md>) here.\n";
1518 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1519 let links = extract_cross_file_links(&ctx);
1520
1521 assert_eq!(links.len(), 1);
1522 assert_eq!(links[0].target_path, "path/with (parens).md");
1523 assert_eq!(links[0].line, 1);
1524 assert_eq!(links[0].column, 13);
1526 }
1527
1528 #[test]
1529 fn test_extract_cross_file_links_skips_external() {
1530 use crate::config::MarkdownFlavor;
1531
1532 let content = r#"
1533[external](https://example.com)
1534[mailto](mailto:test@example.com)
1535[local](./local.md)
1536[fragment](#section)
1537[absolute](/docs/page.md)
1538"#;
1539 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1540 let links = extract_cross_file_links(&ctx);
1541
1542 assert_eq!(links.len(), 1);
1544 assert_eq!(links[0].target_path, "./local.md");
1545 }
1546
1547 #[test]
1548 fn test_extract_cross_file_links_skips_non_markdown() {
1549 use crate::config::MarkdownFlavor;
1550
1551 let content = r#"
1552[image](./photo.png)
1553[doc](./readme.md)
1554[pdf](./document.pdf)
1555"#;
1556 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1557 let links = extract_cross_file_links(&ctx);
1558
1559 assert_eq!(links.len(), 1);
1561 assert_eq!(links[0].target_path, "./readme.md");
1562 }
1563
1564 #[test]
1565 fn test_extract_cross_file_links_skips_code_spans() {
1566 use crate::config::MarkdownFlavor;
1567
1568 let content = "Normal [link](./file.md) and `[code](./ignored.md)` here.\n";
1569 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1570 let links = extract_cross_file_links(&ctx);
1571
1572 assert_eq!(links.len(), 1);
1574 assert_eq!(links[0].target_path, "./file.md");
1575 }
1576
1577 #[test]
1578 fn test_extract_cross_file_links_with_query_params() {
1579 use crate::config::MarkdownFlavor;
1580
1581 let content = "See [doc](./file.md?raw=true) here.\n";
1582 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1583 let links = extract_cross_file_links(&ctx);
1584
1585 assert_eq!(links.len(), 1);
1586 assert_eq!(links[0].target_path, "./file.md");
1588 }
1589
1590 #[test]
1591 fn test_extract_cross_file_links_empty_content() {
1592 use crate::config::MarkdownFlavor;
1593
1594 let content = "";
1595 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1596 let links = extract_cross_file_links(&ctx);
1597
1598 assert!(links.is_empty());
1599 }
1600
1601 #[test]
1602 fn test_extract_cross_file_links_no_links() {
1603 use crate::config::MarkdownFlavor;
1604
1605 let content = "# Just a heading\n\nSome text without links.\n";
1606 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1607 let links = extract_cross_file_links(&ctx);
1608
1609 assert!(links.is_empty());
1610 }
1611
1612 #[test]
1613 fn test_extract_cross_file_links_position_accuracy_issue_234() {
1614 use crate::config::MarkdownFlavor;
1617
1618 let content = r#"# Test Document
1619
1620Here is a [broken link](nonexistent-file.md) that should trigger MD057.
1621
1622And another [link](also-missing.md) on this line.
1623"#;
1624 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1625 let links = extract_cross_file_links(&ctx);
1626
1627 assert_eq!(links.len(), 2);
1628
1629 assert_eq!(links[0].target_path, "nonexistent-file.md");
1631 assert_eq!(links[0].line, 3);
1632 assert_eq!(links[0].column, 25);
1633
1634 assert_eq!(links[1].target_path, "also-missing.md");
1636 assert_eq!(links[1].line, 5);
1637 assert_eq!(links[1].column, 20);
1638 }
1639}