1use regex::Regex;
22use serde::{Deserialize, Serialize};
23use std::collections::{HashMap, HashSet};
24use std::path::{Path, PathBuf};
25use std::sync::LazyLock;
26
27use crate::lint_context::LintContext;
28use crate::utils::element_cache::ElementCache;
29
30static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
40
41static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
44 LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
45
46static URL_EXTRACT_REGEX: LazyLock<Regex> =
49 LazyLock::new(|| Regex::new(r#"]\(\s*([^>)\s#]+)(#[^)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
50
51static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
53 LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
54
55const MARKDOWN_EXTENSIONS: &[&str] = &[
57 ".md",
58 ".markdown",
59 ".mdx",
60 ".mkd",
61 ".mkdn",
62 ".mdown",
63 ".mdwn",
64 ".qmd",
65 ".rmd",
66];
67
68#[inline]
70fn is_markdown_file(path: &str) -> bool {
71 let path_lower = path.to_lowercase();
72 MARKDOWN_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext))
73}
74
75fn strip_query_and_fragment(url: &str) -> &str {
78 let query_pos = url.find('?');
79 let fragment_pos = url.find('#');
80
81 match (query_pos, fragment_pos) {
82 (Some(q), Some(f)) => &url[..q.min(f)],
83 (Some(q), None) => &url[..q],
84 (None, Some(f)) => &url[..f],
85 (None, None) => url,
86 }
87}
88
89pub fn extract_cross_file_links(ctx: &LintContext) -> Vec<CrossFileLinkIndex> {
97 let content = ctx.content;
98
99 if content.is_empty() || !content.contains("](") {
101 return Vec::new();
102 }
103
104 let mut links = Vec::new();
105 let lines: Vec<&str> = content.lines().collect();
106 let element_cache = ElementCache::new(content);
107 let line_index = &ctx.line_index;
108
109 let mut processed_lines = HashSet::new();
112
113 for link in &ctx.links {
114 let line_idx = link.line - 1;
115 if line_idx >= lines.len() {
116 continue;
117 }
118
119 if !processed_lines.insert(line_idx) {
121 continue;
122 }
123
124 let line = lines[line_idx];
125 if !line.contains("](") {
126 continue;
127 }
128
129 for link_match in LINK_START_REGEX.find_iter(line) {
131 let start_pos = link_match.start();
132 let end_pos = link_match.end();
133
134 let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
136 let absolute_start_pos = line_start_byte + start_pos;
137
138 if element_cache.is_in_code_span(absolute_start_pos) {
140 continue;
141 }
142
143 let caps_result = URL_EXTRACT_ANGLE_BRACKET_REGEX
146 .captures_at(line, end_pos - 1)
147 .or_else(|| URL_EXTRACT_REGEX.captures_at(line, end_pos - 1));
148
149 if let Some(caps) = caps_result
150 && let Some(url_group) = caps.get(1)
151 {
152 let file_path = url_group.as_str().trim();
153
154 if file_path.is_empty()
157 || PROTOCOL_DOMAIN_REGEX.is_match(file_path)
158 || file_path.starts_with("www.")
159 || file_path.starts_with('#')
160 || file_path.starts_with("{{")
161 || file_path.starts_with("{%")
162 || file_path.starts_with('/')
163 || file_path.starts_with('~')
164 || file_path.starts_with('@')
165 {
166 continue;
167 }
168
169 let file_path = strip_query_and_fragment(file_path);
171
172 let fragment = caps.get(2).map(|m| m.as_str().trim_start_matches('#')).unwrap_or("");
174
175 if is_markdown_file(file_path) {
177 links.push(CrossFileLinkIndex {
178 target_path: file_path.to_string(),
179 fragment: fragment.to_string(),
180 line: link.line,
181 column: url_group.start() + 1,
182 });
183 }
184 }
185 }
186 }
187
188 links
189}
190
191#[cfg(feature = "native")]
193const CACHE_MAGIC: &[u8; 4] = b"RWSI";
194
195#[cfg(feature = "native")]
197const CACHE_FORMAT_VERSION: u32 = 5;
198
199#[cfg(feature = "native")]
201const CACHE_FILE_NAME: &str = "workspace_index.bin";
202
203#[derive(Debug, Default, Clone, Serialize, Deserialize)]
208pub struct WorkspaceIndex {
209 files: HashMap<PathBuf, FileIndex>,
211 reverse_deps: HashMap<PathBuf, HashSet<PathBuf>>,
214 version: u64,
216}
217
218#[derive(Debug, Clone, Default, Serialize, Deserialize)]
220pub struct FileIndex {
221 pub headings: Vec<HeadingIndex>,
223 pub reference_links: Vec<ReferenceLinkIndex>,
225 pub cross_file_links: Vec<CrossFileLinkIndex>,
227 pub defined_references: HashSet<String>,
230 pub content_hash: String,
232 anchor_to_heading: HashMap<String, usize>,
235 html_anchors: HashSet<String>,
238 attribute_anchors: HashSet<String>,
242 pub file_disabled_rules: HashSet<String>,
245 pub line_disabled_rules: HashMap<usize, HashSet<String>>,
248}
249
250#[derive(Debug, Clone, Serialize, Deserialize)]
252pub struct HeadingIndex {
253 pub text: String,
255 pub auto_anchor: String,
257 pub custom_anchor: Option<String>,
259 pub line: usize,
261}
262
263#[derive(Debug, Clone, Serialize, Deserialize)]
265pub struct ReferenceLinkIndex {
266 pub reference_id: String,
268 pub line: usize,
270 pub column: usize,
272}
273
274#[derive(Debug, Clone, Serialize, Deserialize)]
276pub struct CrossFileLinkIndex {
277 pub target_path: String,
279 pub fragment: String,
281 pub line: usize,
283 pub column: usize,
285}
286
287#[derive(Debug, Clone, Serialize, Deserialize)]
289pub struct VulnerableAnchor {
290 pub file: PathBuf,
292 pub line: usize,
294 pub text: String,
296}
297
298impl WorkspaceIndex {
299 pub fn new() -> Self {
301 Self::default()
302 }
303
304 pub fn version(&self) -> u64 {
306 self.version
307 }
308
309 pub fn file_count(&self) -> usize {
311 self.files.len()
312 }
313
314 pub fn contains_file(&self, path: &Path) -> bool {
316 self.files.contains_key(path)
317 }
318
319 pub fn get_file(&self, path: &Path) -> Option<&FileIndex> {
321 self.files.get(path)
322 }
323
324 pub fn insert_file(&mut self, path: PathBuf, index: FileIndex) {
326 self.files.insert(path, index);
327 self.version = self.version.wrapping_add(1);
328 }
329
330 pub fn remove_file(&mut self, path: &Path) -> Option<FileIndex> {
332 self.clear_reverse_deps_for(path);
334
335 let result = self.files.remove(path);
336 if result.is_some() {
337 self.version = self.version.wrapping_add(1);
338 }
339 result
340 }
341
342 pub fn get_vulnerable_anchors(&self) -> HashMap<String, Vec<VulnerableAnchor>> {
352 let mut vulnerable: HashMap<String, Vec<VulnerableAnchor>> = HashMap::new();
353
354 for (file_path, file_index) in &self.files {
355 for heading in &file_index.headings {
356 if heading.custom_anchor.is_none() && !heading.auto_anchor.is_empty() {
358 let anchor_key = heading.auto_anchor.to_lowercase();
359 vulnerable.entry(anchor_key).or_default().push(VulnerableAnchor {
360 file: file_path.clone(),
361 line: heading.line,
362 text: heading.text.clone(),
363 });
364 }
365 }
366 }
367
368 vulnerable
369 }
370
371 pub fn all_headings(&self) -> impl Iterator<Item = (&Path, &HeadingIndex)> {
373 self.files
374 .iter()
375 .flat_map(|(path, index)| index.headings.iter().map(move |h| (path.as_path(), h)))
376 }
377
378 pub fn files(&self) -> impl Iterator<Item = (&Path, &FileIndex)> {
380 self.files.iter().map(|(p, i)| (p.as_path(), i))
381 }
382
383 pub fn clear(&mut self) {
385 self.files.clear();
386 self.reverse_deps.clear();
387 self.version = self.version.wrapping_add(1);
388 }
389
390 pub fn update_file(&mut self, path: &Path, index: FileIndex) {
397 self.clear_reverse_deps_as_source(path);
400
401 for link in &index.cross_file_links {
403 let target = self.resolve_target_path(path, &link.target_path);
404 self.reverse_deps.entry(target).or_default().insert(path.to_path_buf());
405 }
406
407 self.files.insert(path.to_path_buf(), index);
408 self.version = self.version.wrapping_add(1);
409 }
410
411 pub fn get_dependents(&self, path: &Path) -> Vec<PathBuf> {
416 self.reverse_deps
417 .get(path)
418 .map(|set| set.iter().cloned().collect())
419 .unwrap_or_default()
420 }
421
422 pub fn is_file_stale(&self, path: &Path, current_hash: &str) -> bool {
426 self.files
427 .get(path)
428 .map(|f| f.content_hash != current_hash)
429 .unwrap_or(true)
430 }
431
432 pub fn retain_only(&mut self, current_files: &std::collections::HashSet<PathBuf>) -> usize {
437 let before_count = self.files.len();
438
439 let to_remove: Vec<PathBuf> = self
441 .files
442 .keys()
443 .filter(|path| !current_files.contains(*path))
444 .cloned()
445 .collect();
446
447 for path in &to_remove {
449 self.remove_file(path);
450 }
451
452 before_count - self.files.len()
453 }
454
455 #[cfg(feature = "native")]
462 pub fn save_to_cache(&self, cache_dir: &Path) -> std::io::Result<()> {
463 use std::fs;
464 use std::io::Write;
465
466 fs::create_dir_all(cache_dir)?;
468
469 let encoded = postcard::to_allocvec(self)
471 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
472
473 let mut cache_data = Vec::with_capacity(8 + encoded.len());
475 cache_data.extend_from_slice(CACHE_MAGIC);
476 cache_data.extend_from_slice(&CACHE_FORMAT_VERSION.to_le_bytes());
477 cache_data.extend_from_slice(&encoded);
478
479 let final_path = cache_dir.join(CACHE_FILE_NAME);
481 let temp_path = cache_dir.join(format!("{}.tmp.{}", CACHE_FILE_NAME, std::process::id()));
482
483 {
485 let mut file = fs::File::create(&temp_path)?;
486 file.write_all(&cache_data)?;
487 file.sync_all()?;
488 }
489
490 fs::rename(&temp_path, &final_path)?;
492
493 log::debug!(
494 "Saved workspace index to cache: {} files, {} bytes (format v{})",
495 self.files.len(),
496 cache_data.len(),
497 CACHE_FORMAT_VERSION
498 );
499
500 Ok(())
501 }
502
503 #[cfg(feature = "native")]
511 pub fn load_from_cache(cache_dir: &Path) -> Option<Self> {
512 use std::fs;
513
514 let path = cache_dir.join(CACHE_FILE_NAME);
515 let data = fs::read(&path).ok()?;
516
517 if data.len() < 8 {
519 log::warn!("Workspace index cache too small, discarding");
520 let _ = fs::remove_file(&path);
521 return None;
522 }
523
524 if &data[0..4] != CACHE_MAGIC {
526 log::warn!("Workspace index cache has invalid magic header, discarding");
527 let _ = fs::remove_file(&path);
528 return None;
529 }
530
531 let version = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
533 if version != CACHE_FORMAT_VERSION {
534 log::info!(
535 "Workspace index cache format version mismatch (got {version}, expected {CACHE_FORMAT_VERSION}), rebuilding"
536 );
537 let _ = fs::remove_file(&path);
538 return None;
539 }
540
541 match postcard::from_bytes::<Self>(&data[8..]) {
543 Ok(index) => {
544 log::debug!(
545 "Loaded workspace index from cache: {} files (format v{})",
546 index.files.len(),
547 version
548 );
549 Some(index)
550 }
551 Err(e) => {
552 log::warn!("Failed to deserialize workspace index cache: {e}");
553 let _ = fs::remove_file(&path);
554 None
555 }
556 }
557 }
558
559 fn clear_reverse_deps_as_source(&mut self, path: &Path) {
564 for deps in self.reverse_deps.values_mut() {
565 deps.remove(path);
566 }
567 self.reverse_deps.retain(|_, deps| !deps.is_empty());
569 }
570
571 fn clear_reverse_deps_for(&mut self, path: &Path) {
576 self.clear_reverse_deps_as_source(path);
578
579 self.reverse_deps.remove(path);
581 }
582
583 fn resolve_target_path(&self, source_file: &Path, relative_target: &str) -> PathBuf {
585 let source_dir = source_file.parent().unwrap_or(Path::new(""));
587
588 let target = source_dir.join(relative_target);
590
591 Self::normalize_path(&target)
593 }
594
595 fn normalize_path(path: &Path) -> PathBuf {
597 let mut components = Vec::new();
598
599 for component in path.components() {
600 match component {
601 std::path::Component::ParentDir => {
602 if !components.is_empty() {
604 components.pop();
605 }
606 }
607 std::path::Component::CurDir => {
608 }
610 _ => {
611 components.push(component);
612 }
613 }
614 }
615
616 components.iter().collect()
617 }
618}
619
620impl FileIndex {
621 pub fn new() -> Self {
623 Self::default()
624 }
625
626 pub fn with_hash(content_hash: String) -> Self {
628 Self {
629 content_hash,
630 ..Default::default()
631 }
632 }
633
634 pub fn add_heading(&mut self, heading: HeadingIndex) {
638 let index = self.headings.len();
639
640 self.anchor_to_heading.insert(heading.auto_anchor.to_lowercase(), index);
642
643 if let Some(ref custom) = heading.custom_anchor {
645 self.anchor_to_heading.insert(custom.to_lowercase(), index);
646 }
647
648 self.headings.push(heading);
649 }
650
651 pub fn has_anchor(&self, anchor: &str) -> bool {
661 let lower = anchor.to_lowercase();
662 self.anchor_to_heading.contains_key(&lower)
663 || self.html_anchors.contains(&lower)
664 || self.attribute_anchors.contains(&lower)
665 }
666
667 pub fn add_html_anchor(&mut self, anchor: String) {
669 if !anchor.is_empty() {
670 self.html_anchors.insert(anchor.to_lowercase());
671 }
672 }
673
674 pub fn add_attribute_anchor(&mut self, anchor: String) {
676 if !anchor.is_empty() {
677 self.attribute_anchors.insert(anchor.to_lowercase());
678 }
679 }
680
681 pub fn get_heading_by_anchor(&self, anchor: &str) -> Option<&HeadingIndex> {
685 self.anchor_to_heading
686 .get(&anchor.to_lowercase())
687 .and_then(|&idx| self.headings.get(idx))
688 }
689
690 pub fn add_reference_link(&mut self, link: ReferenceLinkIndex) {
692 self.reference_links.push(link);
693 }
694
695 pub fn is_rule_disabled_at_line(&self, rule_name: &str, line: usize) -> bool {
700 if self.file_disabled_rules.contains("*") || self.file_disabled_rules.contains(rule_name) {
702 return true;
703 }
704
705 if let Some(rules) = self.line_disabled_rules.get(&line) {
707 return rules.contains("*") || rules.contains(rule_name);
708 }
709
710 false
711 }
712
713 pub fn add_cross_file_link(&mut self, link: CrossFileLinkIndex) {
715 let is_duplicate = self.cross_file_links.iter().any(|existing| {
718 existing.target_path == link.target_path && existing.fragment == link.fragment && existing.line == link.line
719 });
720 if !is_duplicate {
721 self.cross_file_links.push(link);
722 }
723 }
724
725 pub fn add_defined_reference(&mut self, ref_id: String) {
727 self.defined_references.insert(ref_id);
728 }
729
730 pub fn has_defined_reference(&self, ref_id: &str) -> bool {
732 self.defined_references.contains(ref_id)
733 }
734
735 pub fn hash_matches(&self, hash: &str) -> bool {
737 self.content_hash == hash
738 }
739
740 pub fn heading_count(&self) -> usize {
742 self.headings.len()
743 }
744
745 pub fn reference_link_count(&self) -> usize {
747 self.reference_links.len()
748 }
749}
750
751#[cfg(test)]
752mod tests {
753 use super::*;
754
755 #[test]
756 fn test_workspace_index_basic() {
757 let mut index = WorkspaceIndex::new();
758 assert_eq!(index.file_count(), 0);
759 assert_eq!(index.version(), 0);
760
761 let mut file_index = FileIndex::with_hash("abc123".to_string());
762 file_index.add_heading(HeadingIndex {
763 text: "Installation".to_string(),
764 auto_anchor: "installation".to_string(),
765 custom_anchor: None,
766 line: 1,
767 });
768
769 index.insert_file(PathBuf::from("docs/install.md"), file_index);
770 assert_eq!(index.file_count(), 1);
771 assert_eq!(index.version(), 1);
772
773 assert!(index.contains_file(Path::new("docs/install.md")));
774 assert!(!index.contains_file(Path::new("docs/other.md")));
775 }
776
777 #[test]
778 fn test_vulnerable_anchors() {
779 let mut index = WorkspaceIndex::new();
780
781 let mut file1 = FileIndex::new();
783 file1.add_heading(HeadingIndex {
784 text: "Getting Started".to_string(),
785 auto_anchor: "getting-started".to_string(),
786 custom_anchor: None,
787 line: 1,
788 });
789 index.insert_file(PathBuf::from("docs/guide.md"), file1);
790
791 let mut file2 = FileIndex::new();
793 file2.add_heading(HeadingIndex {
794 text: "Installation".to_string(),
795 auto_anchor: "installation".to_string(),
796 custom_anchor: Some("install".to_string()),
797 line: 1,
798 });
799 index.insert_file(PathBuf::from("docs/install.md"), file2);
800
801 let vulnerable = index.get_vulnerable_anchors();
802 assert_eq!(vulnerable.len(), 1);
803 assert!(vulnerable.contains_key("getting-started"));
804 assert!(!vulnerable.contains_key("installation"));
805
806 let anchors = vulnerable.get("getting-started").unwrap();
807 assert_eq!(anchors.len(), 1);
808 assert_eq!(anchors[0].file, PathBuf::from("docs/guide.md"));
809 assert_eq!(anchors[0].text, "Getting Started");
810 }
811
812 #[test]
813 fn test_vulnerable_anchors_multiple_files_same_anchor() {
814 let mut index = WorkspaceIndex::new();
817
818 let mut file1 = FileIndex::new();
820 file1.add_heading(HeadingIndex {
821 text: "Installation".to_string(),
822 auto_anchor: "installation".to_string(),
823 custom_anchor: None,
824 line: 1,
825 });
826 index.insert_file(PathBuf::from("docs/en/guide.md"), file1);
827
828 let mut file2 = FileIndex::new();
830 file2.add_heading(HeadingIndex {
831 text: "Installation".to_string(),
832 auto_anchor: "installation".to_string(),
833 custom_anchor: None,
834 line: 5,
835 });
836 index.insert_file(PathBuf::from("docs/fr/guide.md"), file2);
837
838 let mut file3 = FileIndex::new();
840 file3.add_heading(HeadingIndex {
841 text: "Installation".to_string(),
842 auto_anchor: "installation".to_string(),
843 custom_anchor: Some("install".to_string()),
844 line: 10,
845 });
846 index.insert_file(PathBuf::from("docs/de/guide.md"), file3);
847
848 let vulnerable = index.get_vulnerable_anchors();
849 assert_eq!(vulnerable.len(), 1); assert!(vulnerable.contains_key("installation"));
851
852 let anchors = vulnerable.get("installation").unwrap();
853 assert_eq!(anchors.len(), 2, "Should collect both vulnerable anchors");
855
856 let files: std::collections::HashSet<_> = anchors.iter().map(|a| &a.file).collect();
858 assert!(files.contains(&PathBuf::from("docs/en/guide.md")));
859 assert!(files.contains(&PathBuf::from("docs/fr/guide.md")));
860 }
861
862 #[test]
863 fn test_file_index_hash() {
864 let index = FileIndex::with_hash("hash123".to_string());
865 assert!(index.hash_matches("hash123"));
866 assert!(!index.hash_matches("other"));
867 }
868
869 #[test]
870 fn test_version_increment() {
871 let mut index = WorkspaceIndex::new();
872 assert_eq!(index.version(), 0);
873
874 index.insert_file(PathBuf::from("a.md"), FileIndex::new());
875 assert_eq!(index.version(), 1);
876
877 index.insert_file(PathBuf::from("b.md"), FileIndex::new());
878 assert_eq!(index.version(), 2);
879
880 index.remove_file(Path::new("a.md"));
881 assert_eq!(index.version(), 3);
882
883 index.remove_file(Path::new("nonexistent.md"));
885 assert_eq!(index.version(), 3);
886 }
887
888 #[test]
889 fn test_reverse_deps_basic() {
890 let mut index = WorkspaceIndex::new();
891
892 let mut file_a = FileIndex::new();
894 file_a.add_cross_file_link(CrossFileLinkIndex {
895 target_path: "b.md".to_string(),
896 fragment: "section".to_string(),
897 line: 10,
898 column: 5,
899 });
900 index.update_file(Path::new("docs/a.md"), file_a);
901
902 let dependents = index.get_dependents(Path::new("docs/b.md"));
904 assert_eq!(dependents.len(), 1);
905 assert_eq!(dependents[0], PathBuf::from("docs/a.md"));
906
907 let a_dependents = index.get_dependents(Path::new("docs/a.md"));
909 assert!(a_dependents.is_empty());
910 }
911
912 #[test]
913 fn test_reverse_deps_multiple() {
914 let mut index = WorkspaceIndex::new();
915
916 let mut file_a = FileIndex::new();
918 file_a.add_cross_file_link(CrossFileLinkIndex {
919 target_path: "../b.md".to_string(),
920 fragment: "".to_string(),
921 line: 1,
922 column: 1,
923 });
924 index.update_file(Path::new("docs/sub/a.md"), file_a);
925
926 let mut file_c = FileIndex::new();
927 file_c.add_cross_file_link(CrossFileLinkIndex {
928 target_path: "b.md".to_string(),
929 fragment: "".to_string(),
930 line: 1,
931 column: 1,
932 });
933 index.update_file(Path::new("docs/c.md"), file_c);
934
935 let dependents = index.get_dependents(Path::new("docs/b.md"));
937 assert_eq!(dependents.len(), 2);
938 assert!(dependents.contains(&PathBuf::from("docs/sub/a.md")));
939 assert!(dependents.contains(&PathBuf::from("docs/c.md")));
940 }
941
942 #[test]
943 fn test_reverse_deps_update_clears_old() {
944 let mut index = WorkspaceIndex::new();
945
946 let mut file_a = FileIndex::new();
948 file_a.add_cross_file_link(CrossFileLinkIndex {
949 target_path: "b.md".to_string(),
950 fragment: "".to_string(),
951 line: 1,
952 column: 1,
953 });
954 index.update_file(Path::new("docs/a.md"), file_a);
955
956 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
958
959 let mut file_a_updated = FileIndex::new();
961 file_a_updated.add_cross_file_link(CrossFileLinkIndex {
962 target_path: "c.md".to_string(),
963 fragment: "".to_string(),
964 line: 1,
965 column: 1,
966 });
967 index.update_file(Path::new("docs/a.md"), file_a_updated);
968
969 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
971
972 let c_deps = index.get_dependents(Path::new("docs/c.md"));
974 assert_eq!(c_deps.len(), 1);
975 assert_eq!(c_deps[0], PathBuf::from("docs/a.md"));
976 }
977
978 #[test]
979 fn test_reverse_deps_remove_file() {
980 let mut index = WorkspaceIndex::new();
981
982 let mut file_a = FileIndex::new();
984 file_a.add_cross_file_link(CrossFileLinkIndex {
985 target_path: "b.md".to_string(),
986 fragment: "".to_string(),
987 line: 1,
988 column: 1,
989 });
990 index.update_file(Path::new("docs/a.md"), file_a);
991
992 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
994
995 index.remove_file(Path::new("docs/a.md"));
997
998 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1000 }
1001
1002 #[test]
1003 fn test_normalize_path() {
1004 let path = Path::new("docs/sub/../other.md");
1006 let normalized = WorkspaceIndex::normalize_path(path);
1007 assert_eq!(normalized, PathBuf::from("docs/other.md"));
1008
1009 let path2 = Path::new("docs/./other.md");
1011 let normalized2 = WorkspaceIndex::normalize_path(path2);
1012 assert_eq!(normalized2, PathBuf::from("docs/other.md"));
1013
1014 let path3 = Path::new("a/b/c/../../d.md");
1016 let normalized3 = WorkspaceIndex::normalize_path(path3);
1017 assert_eq!(normalized3, PathBuf::from("a/d.md"));
1018 }
1019
1020 #[test]
1021 fn test_clear_clears_reverse_deps() {
1022 let mut index = WorkspaceIndex::new();
1023
1024 let mut file_a = FileIndex::new();
1026 file_a.add_cross_file_link(CrossFileLinkIndex {
1027 target_path: "b.md".to_string(),
1028 fragment: "".to_string(),
1029 line: 1,
1030 column: 1,
1031 });
1032 index.update_file(Path::new("docs/a.md"), file_a);
1033
1034 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1036
1037 index.clear();
1039
1040 assert_eq!(index.file_count(), 0);
1042 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1043 }
1044
1045 #[test]
1046 fn test_is_file_stale() {
1047 let mut index = WorkspaceIndex::new();
1048
1049 assert!(index.is_file_stale(Path::new("nonexistent.md"), "hash123"));
1051
1052 let file_index = FileIndex::with_hash("hash123".to_string());
1054 index.insert_file(PathBuf::from("docs/test.md"), file_index);
1055
1056 assert!(!index.is_file_stale(Path::new("docs/test.md"), "hash123"));
1058
1059 assert!(index.is_file_stale(Path::new("docs/test.md"), "different_hash"));
1061 }
1062
1063 #[cfg(feature = "native")]
1064 #[test]
1065 fn test_cache_roundtrip() {
1066 use std::fs;
1067
1068 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_roundtrip");
1070 let _ = fs::remove_dir_all(&temp_dir);
1071 fs::create_dir_all(&temp_dir).unwrap();
1072
1073 let mut index = WorkspaceIndex::new();
1075
1076 let mut file1 = FileIndex::with_hash("abc123".to_string());
1077 file1.add_heading(HeadingIndex {
1078 text: "Test Heading".to_string(),
1079 auto_anchor: "test-heading".to_string(),
1080 custom_anchor: Some("test".to_string()),
1081 line: 1,
1082 });
1083 file1.add_cross_file_link(CrossFileLinkIndex {
1084 target_path: "./other.md".to_string(),
1085 fragment: "section".to_string(),
1086 line: 5,
1087 column: 3,
1088 });
1089 index.update_file(Path::new("docs/file1.md"), file1);
1090
1091 let mut file2 = FileIndex::with_hash("def456".to_string());
1092 file2.add_heading(HeadingIndex {
1093 text: "Another Heading".to_string(),
1094 auto_anchor: "another-heading".to_string(),
1095 custom_anchor: None,
1096 line: 1,
1097 });
1098 index.update_file(Path::new("docs/other.md"), file2);
1099
1100 index.save_to_cache(&temp_dir).expect("Failed to save cache");
1102
1103 assert!(temp_dir.join("workspace_index.bin").exists());
1105
1106 let loaded = WorkspaceIndex::load_from_cache(&temp_dir).expect("Failed to load cache");
1108
1109 assert_eq!(loaded.file_count(), 2);
1111 assert!(loaded.contains_file(Path::new("docs/file1.md")));
1112 assert!(loaded.contains_file(Path::new("docs/other.md")));
1113
1114 let file1_loaded = loaded.get_file(Path::new("docs/file1.md")).unwrap();
1116 assert_eq!(file1_loaded.content_hash, "abc123");
1117 assert_eq!(file1_loaded.headings.len(), 1);
1118 assert_eq!(file1_loaded.headings[0].text, "Test Heading");
1119 assert_eq!(file1_loaded.headings[0].custom_anchor, Some("test".to_string()));
1120 assert_eq!(file1_loaded.cross_file_links.len(), 1);
1121 assert_eq!(file1_loaded.cross_file_links[0].target_path, "./other.md");
1122
1123 let dependents = loaded.get_dependents(Path::new("docs/other.md"));
1125 assert_eq!(dependents.len(), 1);
1126 assert_eq!(dependents[0], PathBuf::from("docs/file1.md"));
1127
1128 let _ = fs::remove_dir_all(&temp_dir);
1130 }
1131
1132 #[cfg(feature = "native")]
1133 #[test]
1134 fn test_cache_missing_file() {
1135 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_missing");
1136 let _ = std::fs::remove_dir_all(&temp_dir);
1137
1138 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1140 assert!(result.is_none());
1141 }
1142
1143 #[cfg(feature = "native")]
1144 #[test]
1145 fn test_cache_corrupted_file() {
1146 use std::fs;
1147
1148 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_corrupted");
1149 let _ = fs::remove_dir_all(&temp_dir);
1150 fs::create_dir_all(&temp_dir).unwrap();
1151
1152 fs::write(temp_dir.join("workspace_index.bin"), b"bad").unwrap();
1154
1155 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1157 assert!(result.is_none());
1158
1159 assert!(!temp_dir.join("workspace_index.bin").exists());
1161
1162 let _ = fs::remove_dir_all(&temp_dir);
1164 }
1165
1166 #[cfg(feature = "native")]
1167 #[test]
1168 fn test_cache_invalid_magic() {
1169 use std::fs;
1170
1171 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_invalid_magic");
1172 let _ = fs::remove_dir_all(&temp_dir);
1173 fs::create_dir_all(&temp_dir).unwrap();
1174
1175 let mut data = Vec::new();
1177 data.extend_from_slice(b"XXXX"); data.extend_from_slice(&1u32.to_le_bytes()); data.extend_from_slice(&[0; 100]); fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
1181
1182 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1184 assert!(result.is_none());
1185
1186 assert!(!temp_dir.join("workspace_index.bin").exists());
1188
1189 let _ = fs::remove_dir_all(&temp_dir);
1191 }
1192
1193 #[cfg(feature = "native")]
1194 #[test]
1195 fn test_cache_version_mismatch() {
1196 use std::fs;
1197
1198 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_version_mismatch");
1199 let _ = fs::remove_dir_all(&temp_dir);
1200 fs::create_dir_all(&temp_dir).unwrap();
1201
1202 let mut data = Vec::new();
1204 data.extend_from_slice(b"RWSI"); data.extend_from_slice(&999u32.to_le_bytes()); data.extend_from_slice(&[0; 100]); fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
1208
1209 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1211 assert!(result.is_none());
1212
1213 assert!(!temp_dir.join("workspace_index.bin").exists());
1215
1216 let _ = fs::remove_dir_all(&temp_dir);
1218 }
1219
1220 #[cfg(feature = "native")]
1221 #[test]
1222 fn test_cache_atomic_write() {
1223 use std::fs;
1224
1225 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_atomic");
1227 let _ = fs::remove_dir_all(&temp_dir);
1228 fs::create_dir_all(&temp_dir).unwrap();
1229
1230 let index = WorkspaceIndex::new();
1231 index.save_to_cache(&temp_dir).expect("Failed to save");
1232
1233 let entries: Vec<_> = fs::read_dir(&temp_dir).unwrap().collect();
1235 assert_eq!(entries.len(), 1);
1236 assert!(temp_dir.join("workspace_index.bin").exists());
1237
1238 let _ = fs::remove_dir_all(&temp_dir);
1240 }
1241
1242 #[test]
1243 fn test_has_anchor_auto_generated() {
1244 let mut file_index = FileIndex::new();
1245 file_index.add_heading(HeadingIndex {
1246 text: "Installation Guide".to_string(),
1247 auto_anchor: "installation-guide".to_string(),
1248 custom_anchor: None,
1249 line: 1,
1250 });
1251
1252 assert!(file_index.has_anchor("installation-guide"));
1254
1255 assert!(file_index.has_anchor("Installation-Guide"));
1257 assert!(file_index.has_anchor("INSTALLATION-GUIDE"));
1258
1259 assert!(!file_index.has_anchor("nonexistent"));
1261 }
1262
1263 #[test]
1264 fn test_has_anchor_custom() {
1265 let mut file_index = FileIndex::new();
1266 file_index.add_heading(HeadingIndex {
1267 text: "Installation Guide".to_string(),
1268 auto_anchor: "installation-guide".to_string(),
1269 custom_anchor: Some("install".to_string()),
1270 line: 1,
1271 });
1272
1273 assert!(file_index.has_anchor("installation-guide"));
1275
1276 assert!(file_index.has_anchor("install"));
1278 assert!(file_index.has_anchor("Install")); assert!(!file_index.has_anchor("nonexistent"));
1282 }
1283
1284 #[test]
1285 fn test_get_heading_by_anchor() {
1286 let mut file_index = FileIndex::new();
1287 file_index.add_heading(HeadingIndex {
1288 text: "Installation Guide".to_string(),
1289 auto_anchor: "installation-guide".to_string(),
1290 custom_anchor: Some("install".to_string()),
1291 line: 10,
1292 });
1293 file_index.add_heading(HeadingIndex {
1294 text: "Configuration".to_string(),
1295 auto_anchor: "configuration".to_string(),
1296 custom_anchor: None,
1297 line: 20,
1298 });
1299
1300 let heading = file_index.get_heading_by_anchor("installation-guide");
1302 assert!(heading.is_some());
1303 assert_eq!(heading.unwrap().text, "Installation Guide");
1304 assert_eq!(heading.unwrap().line, 10);
1305
1306 let heading = file_index.get_heading_by_anchor("install");
1308 assert!(heading.is_some());
1309 assert_eq!(heading.unwrap().text, "Installation Guide");
1310
1311 let heading = file_index.get_heading_by_anchor("configuration");
1313 assert!(heading.is_some());
1314 assert_eq!(heading.unwrap().text, "Configuration");
1315 assert_eq!(heading.unwrap().line, 20);
1316
1317 assert!(file_index.get_heading_by_anchor("nonexistent").is_none());
1319 }
1320
1321 #[test]
1322 fn test_anchor_lookup_many_headings() {
1323 let mut file_index = FileIndex::new();
1325
1326 for i in 0..100 {
1328 file_index.add_heading(HeadingIndex {
1329 text: format!("Heading {i}"),
1330 auto_anchor: format!("heading-{i}"),
1331 custom_anchor: Some(format!("h{i}")),
1332 line: i + 1,
1333 });
1334 }
1335
1336 for i in 0..100 {
1338 assert!(file_index.has_anchor(&format!("heading-{i}")));
1339 assert!(file_index.has_anchor(&format!("h{i}")));
1340
1341 let heading = file_index.get_heading_by_anchor(&format!("heading-{i}"));
1342 assert!(heading.is_some());
1343 assert_eq!(heading.unwrap().line, i + 1);
1344 }
1345 }
1346
1347 #[test]
1352 fn test_extract_cross_file_links_basic() {
1353 use crate::config::MarkdownFlavor;
1354
1355 let content = "# Test\n\nSee [link](./other.md) for info.\n";
1356 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1357 let links = extract_cross_file_links(&ctx);
1358
1359 assert_eq!(links.len(), 1);
1360 assert_eq!(links[0].target_path, "./other.md");
1361 assert_eq!(links[0].fragment, "");
1362 assert_eq!(links[0].line, 3);
1363 assert_eq!(links[0].column, 12);
1365 }
1366
1367 #[test]
1368 fn test_extract_cross_file_links_with_fragment() {
1369 use crate::config::MarkdownFlavor;
1370
1371 let content = "Check [guide](./guide.md#install) here.\n";
1372 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1373 let links = extract_cross_file_links(&ctx);
1374
1375 assert_eq!(links.len(), 1);
1376 assert_eq!(links[0].target_path, "./guide.md");
1377 assert_eq!(links[0].fragment, "install");
1378 assert_eq!(links[0].line, 1);
1379 assert_eq!(links[0].column, 15);
1381 }
1382
1383 #[test]
1384 fn test_extract_cross_file_links_multiple_on_same_line() {
1385 use crate::config::MarkdownFlavor;
1386
1387 let content = "See [a](a.md) and [b](b.md) here.\n";
1388 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1389 let links = extract_cross_file_links(&ctx);
1390
1391 assert_eq!(links.len(), 2);
1392
1393 assert_eq!(links[0].target_path, "a.md");
1394 assert_eq!(links[0].line, 1);
1395 assert_eq!(links[0].column, 9);
1397
1398 assert_eq!(links[1].target_path, "b.md");
1399 assert_eq!(links[1].line, 1);
1400 assert_eq!(links[1].column, 23);
1402 }
1403
1404 #[test]
1405 fn test_extract_cross_file_links_angle_brackets() {
1406 use crate::config::MarkdownFlavor;
1407
1408 let content = "See [link](<path/with (parens).md>) here.\n";
1409 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1410 let links = extract_cross_file_links(&ctx);
1411
1412 assert_eq!(links.len(), 1);
1413 assert_eq!(links[0].target_path, "path/with (parens).md");
1414 assert_eq!(links[0].line, 1);
1415 assert_eq!(links[0].column, 13);
1417 }
1418
1419 #[test]
1420 fn test_extract_cross_file_links_skips_external() {
1421 use crate::config::MarkdownFlavor;
1422
1423 let content = r#"
1424[external](https://example.com)
1425[mailto](mailto:test@example.com)
1426[local](./local.md)
1427[fragment](#section)
1428[absolute](/docs/page.md)
1429"#;
1430 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1431 let links = extract_cross_file_links(&ctx);
1432
1433 assert_eq!(links.len(), 1);
1435 assert_eq!(links[0].target_path, "./local.md");
1436 }
1437
1438 #[test]
1439 fn test_extract_cross_file_links_skips_non_markdown() {
1440 use crate::config::MarkdownFlavor;
1441
1442 let content = r#"
1443[image](./photo.png)
1444[doc](./readme.md)
1445[pdf](./document.pdf)
1446"#;
1447 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1448 let links = extract_cross_file_links(&ctx);
1449
1450 assert_eq!(links.len(), 1);
1452 assert_eq!(links[0].target_path, "./readme.md");
1453 }
1454
1455 #[test]
1456 fn test_extract_cross_file_links_skips_code_spans() {
1457 use crate::config::MarkdownFlavor;
1458
1459 let content = "Normal [link](./file.md) and `[code](./ignored.md)` here.\n";
1460 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1461 let links = extract_cross_file_links(&ctx);
1462
1463 assert_eq!(links.len(), 1);
1465 assert_eq!(links[0].target_path, "./file.md");
1466 }
1467
1468 #[test]
1469 fn test_extract_cross_file_links_with_query_params() {
1470 use crate::config::MarkdownFlavor;
1471
1472 let content = "See [doc](./file.md?raw=true) here.\n";
1473 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1474 let links = extract_cross_file_links(&ctx);
1475
1476 assert_eq!(links.len(), 1);
1477 assert_eq!(links[0].target_path, "./file.md");
1479 }
1480
1481 #[test]
1482 fn test_extract_cross_file_links_empty_content() {
1483 use crate::config::MarkdownFlavor;
1484
1485 let content = "";
1486 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1487 let links = extract_cross_file_links(&ctx);
1488
1489 assert!(links.is_empty());
1490 }
1491
1492 #[test]
1493 fn test_extract_cross_file_links_no_links() {
1494 use crate::config::MarkdownFlavor;
1495
1496 let content = "# Just a heading\n\nSome text without links.\n";
1497 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1498 let links = extract_cross_file_links(&ctx);
1499
1500 assert!(links.is_empty());
1501 }
1502
1503 #[test]
1504 fn test_extract_cross_file_links_position_accuracy_issue_234() {
1505 use crate::config::MarkdownFlavor;
1508
1509 let content = r#"# Test Document
1510
1511Here is a [broken link](nonexistent-file.md) that should trigger MD057.
1512
1513And another [link](also-missing.md) on this line.
1514"#;
1515 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1516 let links = extract_cross_file_links(&ctx);
1517
1518 assert_eq!(links.len(), 2);
1519
1520 assert_eq!(links[0].target_path, "nonexistent-file.md");
1522 assert_eq!(links[0].line, 3);
1523 assert_eq!(links[0].column, 25);
1524
1525 assert_eq!(links[1].target_path, "also-missing.md");
1527 assert_eq!(links[1].line, 5);
1528 assert_eq!(links[1].column, 20);
1529 }
1530}