1use regex::Regex;
22use serde::{Deserialize, Serialize};
23use std::collections::{HashMap, HashSet};
24use std::path::{Path, PathBuf};
25use std::sync::LazyLock;
26
27use crate::lint_context::LintContext;
28use crate::utils::element_cache::ElementCache;
29
30fn hex_digit_to_value(c: u8) -> Option<u8> {
36 match c {
37 b'0'..=b'9' => Some(c - b'0'),
38 b'a'..=b'f' => Some(c - b'a' + 10),
39 b'A'..=b'F' => Some(c - b'A' + 10),
40 _ => None,
41 }
42}
43
44fn url_decode(s: &str) -> String {
48 if !s.contains('%') {
50 return s.to_string();
51 }
52
53 let bytes = s.as_bytes();
54 let mut result = Vec::with_capacity(bytes.len());
55 let mut i = 0;
56
57 while i < bytes.len() {
58 if bytes[i] == b'%' && i + 2 < bytes.len() {
59 let hex1 = bytes[i + 1];
61 let hex2 = bytes[i + 2];
62 if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
63 result.push(d1 * 16 + d2);
64 i += 3;
65 continue;
66 }
67 }
68 result.push(bytes[i]);
69 i += 1;
70 }
71
72 String::from_utf8(result).unwrap_or_else(|_| s.to_string())
74}
75
76static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
86
87static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
90 LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
91
92static URL_EXTRACT_REGEX: LazyLock<Regex> =
95 LazyLock::new(|| Regex::new(r#"]\(\s*([^>)\s#]+)(#[^)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
96
97static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
99 LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
100
101const MARKDOWN_EXTENSIONS: &[&str] = &[
103 ".md",
104 ".markdown",
105 ".mdx",
106 ".mkd",
107 ".mkdn",
108 ".mdown",
109 ".mdwn",
110 ".qmd",
111 ".rmd",
112];
113
114#[inline]
116fn is_markdown_file(path: &str) -> bool {
117 let path_lower = path.to_lowercase();
118 MARKDOWN_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext))
119}
120
121fn strip_query_and_fragment(url: &str) -> &str {
124 let query_pos = url.find('?');
125 let fragment_pos = url.find('#');
126
127 match (query_pos, fragment_pos) {
128 (Some(q), Some(f)) => &url[..q.min(f)],
129 (Some(q), None) => &url[..q],
130 (None, Some(f)) => &url[..f],
131 (None, None) => url,
132 }
133}
134
135pub fn extract_cross_file_links(ctx: &LintContext) -> Vec<CrossFileLinkIndex> {
143 let content = ctx.content;
144
145 if content.is_empty() || !content.contains("](") {
147 return Vec::new();
148 }
149
150 let mut links = Vec::new();
151 let lines: Vec<&str> = content.lines().collect();
152 let element_cache = ElementCache::new(content);
153 let line_index = &ctx.line_index;
154
155 let mut processed_lines = HashSet::new();
158
159 for link in &ctx.links {
160 let line_idx = link.line - 1;
161 if line_idx >= lines.len() {
162 continue;
163 }
164
165 if !processed_lines.insert(line_idx) {
167 continue;
168 }
169
170 let line = lines[line_idx];
171 if !line.contains("](") {
172 continue;
173 }
174
175 for link_match in LINK_START_REGEX.find_iter(line) {
177 let start_pos = link_match.start();
178 let end_pos = link_match.end();
179
180 let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
182 let absolute_start_pos = line_start_byte + start_pos;
183
184 if element_cache.is_in_code_span(absolute_start_pos) {
186 continue;
187 }
188
189 let caps_result = URL_EXTRACT_ANGLE_BRACKET_REGEX
192 .captures_at(line, end_pos - 1)
193 .or_else(|| URL_EXTRACT_REGEX.captures_at(line, end_pos - 1));
194
195 if let Some(caps) = caps_result
196 && let Some(url_group) = caps.get(1)
197 {
198 let file_path = url_group.as_str().trim();
199
200 if file_path.is_empty()
203 || PROTOCOL_DOMAIN_REGEX.is_match(file_path)
204 || file_path.starts_with("www.")
205 || file_path.starts_with('#')
206 || file_path.starts_with("{{")
207 || file_path.starts_with("{%")
208 || file_path.starts_with('/')
209 || file_path.starts_with('~')
210 || file_path.starts_with('@')
211 || (file_path.starts_with('`') && file_path.ends_with('`'))
212 {
213 continue;
214 }
215
216 let file_path = strip_query_and_fragment(file_path);
218
219 let fragment = caps.get(2).map(|m| m.as_str().trim_start_matches('#')).unwrap_or("");
221
222 if is_markdown_file(file_path) {
224 links.push(CrossFileLinkIndex {
225 target_path: file_path.to_string(),
226 fragment: fragment.to_string(),
227 line: link.line,
228 column: url_group.start() + 1,
229 });
230 }
231 }
232 }
233 }
234
235 links
236}
237
238#[cfg(feature = "native")]
240const CACHE_MAGIC: &[u8; 4] = b"RWSI";
241
242#[cfg(feature = "native")]
244const CACHE_FORMAT_VERSION: u32 = 5;
245
246#[cfg(feature = "native")]
248const CACHE_FILE_NAME: &str = "workspace_index.bin";
249
250#[derive(Debug, Default, Clone, Serialize, Deserialize)]
255pub struct WorkspaceIndex {
256 files: HashMap<PathBuf, FileIndex>,
258 reverse_deps: HashMap<PathBuf, HashSet<PathBuf>>,
261 version: u64,
263}
264
265#[derive(Debug, Clone, Default, Serialize, Deserialize)]
267pub struct FileIndex {
268 pub headings: Vec<HeadingIndex>,
270 pub reference_links: Vec<ReferenceLinkIndex>,
272 pub cross_file_links: Vec<CrossFileLinkIndex>,
274 pub defined_references: HashSet<String>,
277 pub content_hash: String,
279 anchor_to_heading: HashMap<String, usize>,
282 html_anchors: HashSet<String>,
285 attribute_anchors: HashSet<String>,
289 pub file_disabled_rules: HashSet<String>,
292 pub line_disabled_rules: HashMap<usize, HashSet<String>>,
295}
296
297#[derive(Debug, Clone, Serialize, Deserialize)]
299pub struct HeadingIndex {
300 pub text: String,
302 pub auto_anchor: String,
304 pub custom_anchor: Option<String>,
306 pub line: usize,
308}
309
310#[derive(Debug, Clone, Serialize, Deserialize)]
312pub struct ReferenceLinkIndex {
313 pub reference_id: String,
315 pub line: usize,
317 pub column: usize,
319}
320
321#[derive(Debug, Clone, Serialize, Deserialize)]
323pub struct CrossFileLinkIndex {
324 pub target_path: String,
326 pub fragment: String,
328 pub line: usize,
330 pub column: usize,
332}
333
334#[derive(Debug, Clone, Serialize, Deserialize)]
336pub struct VulnerableAnchor {
337 pub file: PathBuf,
339 pub line: usize,
341 pub text: String,
343}
344
345impl WorkspaceIndex {
346 pub fn new() -> Self {
348 Self::default()
349 }
350
351 pub fn version(&self) -> u64 {
353 self.version
354 }
355
356 pub fn file_count(&self) -> usize {
358 self.files.len()
359 }
360
361 pub fn contains_file(&self, path: &Path) -> bool {
363 self.files.contains_key(path)
364 }
365
366 pub fn get_file(&self, path: &Path) -> Option<&FileIndex> {
368 self.files.get(path)
369 }
370
371 pub fn insert_file(&mut self, path: PathBuf, index: FileIndex) {
373 self.files.insert(path, index);
374 self.version = self.version.wrapping_add(1);
375 }
376
377 pub fn remove_file(&mut self, path: &Path) -> Option<FileIndex> {
379 self.clear_reverse_deps_for(path);
381
382 let result = self.files.remove(path);
383 if result.is_some() {
384 self.version = self.version.wrapping_add(1);
385 }
386 result
387 }
388
389 pub fn get_vulnerable_anchors(&self) -> HashMap<String, Vec<VulnerableAnchor>> {
399 let mut vulnerable: HashMap<String, Vec<VulnerableAnchor>> = HashMap::new();
400
401 for (file_path, file_index) in &self.files {
402 for heading in &file_index.headings {
403 if heading.custom_anchor.is_none() && !heading.auto_anchor.is_empty() {
405 let anchor_key = heading.auto_anchor.to_lowercase();
406 vulnerable.entry(anchor_key).or_default().push(VulnerableAnchor {
407 file: file_path.clone(),
408 line: heading.line,
409 text: heading.text.clone(),
410 });
411 }
412 }
413 }
414
415 vulnerable
416 }
417
418 pub fn all_headings(&self) -> impl Iterator<Item = (&Path, &HeadingIndex)> {
420 self.files
421 .iter()
422 .flat_map(|(path, index)| index.headings.iter().map(move |h| (path.as_path(), h)))
423 }
424
425 pub fn files(&self) -> impl Iterator<Item = (&Path, &FileIndex)> {
427 self.files.iter().map(|(p, i)| (p.as_path(), i))
428 }
429
430 pub fn clear(&mut self) {
432 self.files.clear();
433 self.reverse_deps.clear();
434 self.version = self.version.wrapping_add(1);
435 }
436
437 pub fn update_file(&mut self, path: &Path, index: FileIndex) {
444 self.clear_reverse_deps_as_source(path);
447
448 for link in &index.cross_file_links {
450 let target = self.resolve_target_path(path, &link.target_path);
451 self.reverse_deps.entry(target).or_default().insert(path.to_path_buf());
452 }
453
454 self.files.insert(path.to_path_buf(), index);
455 self.version = self.version.wrapping_add(1);
456 }
457
458 pub fn get_dependents(&self, path: &Path) -> Vec<PathBuf> {
463 self.reverse_deps
464 .get(path)
465 .map(|set| set.iter().cloned().collect())
466 .unwrap_or_default()
467 }
468
469 pub fn is_file_stale(&self, path: &Path, current_hash: &str) -> bool {
473 self.files
474 .get(path)
475 .map(|f| f.content_hash != current_hash)
476 .unwrap_or(true)
477 }
478
479 pub fn retain_only(&mut self, current_files: &std::collections::HashSet<PathBuf>) -> usize {
484 let before_count = self.files.len();
485
486 let to_remove: Vec<PathBuf> = self
488 .files
489 .keys()
490 .filter(|path| !current_files.contains(*path))
491 .cloned()
492 .collect();
493
494 for path in &to_remove {
496 self.remove_file(path);
497 }
498
499 before_count - self.files.len()
500 }
501
502 #[cfg(feature = "native")]
509 pub fn save_to_cache(&self, cache_dir: &Path) -> std::io::Result<()> {
510 use std::fs;
511 use std::io::Write;
512
513 fs::create_dir_all(cache_dir)?;
515
516 let encoded = postcard::to_allocvec(self)
518 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
519
520 let mut cache_data = Vec::with_capacity(8 + encoded.len());
522 cache_data.extend_from_slice(CACHE_MAGIC);
523 cache_data.extend_from_slice(&CACHE_FORMAT_VERSION.to_le_bytes());
524 cache_data.extend_from_slice(&encoded);
525
526 let final_path = cache_dir.join(CACHE_FILE_NAME);
528 let temp_path = cache_dir.join(format!("{}.tmp.{}", CACHE_FILE_NAME, std::process::id()));
529
530 {
532 let mut file = fs::File::create(&temp_path)?;
533 file.write_all(&cache_data)?;
534 file.sync_all()?;
535 }
536
537 fs::rename(&temp_path, &final_path)?;
539
540 log::debug!(
541 "Saved workspace index to cache: {} files, {} bytes (format v{})",
542 self.files.len(),
543 cache_data.len(),
544 CACHE_FORMAT_VERSION
545 );
546
547 Ok(())
548 }
549
550 #[cfg(feature = "native")]
558 pub fn load_from_cache(cache_dir: &Path) -> Option<Self> {
559 use std::fs;
560
561 let path = cache_dir.join(CACHE_FILE_NAME);
562 let data = fs::read(&path).ok()?;
563
564 if data.len() < 8 {
566 log::warn!("Workspace index cache too small, discarding");
567 let _ = fs::remove_file(&path);
568 return None;
569 }
570
571 if &data[0..4] != CACHE_MAGIC {
573 log::warn!("Workspace index cache has invalid magic header, discarding");
574 let _ = fs::remove_file(&path);
575 return None;
576 }
577
578 let version = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
580 if version != CACHE_FORMAT_VERSION {
581 log::info!(
582 "Workspace index cache format version mismatch (got {version}, expected {CACHE_FORMAT_VERSION}), rebuilding"
583 );
584 let _ = fs::remove_file(&path);
585 return None;
586 }
587
588 match postcard::from_bytes::<Self>(&data[8..]) {
590 Ok(index) => {
591 log::debug!(
592 "Loaded workspace index from cache: {} files (format v{})",
593 index.files.len(),
594 version
595 );
596 Some(index)
597 }
598 Err(e) => {
599 log::warn!("Failed to deserialize workspace index cache: {e}");
600 let _ = fs::remove_file(&path);
601 None
602 }
603 }
604 }
605
606 fn clear_reverse_deps_as_source(&mut self, path: &Path) {
611 for deps in self.reverse_deps.values_mut() {
612 deps.remove(path);
613 }
614 self.reverse_deps.retain(|_, deps| !deps.is_empty());
616 }
617
618 fn clear_reverse_deps_for(&mut self, path: &Path) {
623 self.clear_reverse_deps_as_source(path);
625
626 self.reverse_deps.remove(path);
628 }
629
630 fn resolve_target_path(&self, source_file: &Path, relative_target: &str) -> PathBuf {
632 let source_dir = source_file.parent().unwrap_or(Path::new(""));
634
635 let target = source_dir.join(relative_target);
637
638 Self::normalize_path(&target)
640 }
641
642 fn normalize_path(path: &Path) -> PathBuf {
644 let mut components = Vec::new();
645
646 for component in path.components() {
647 match component {
648 std::path::Component::ParentDir => {
649 if !components.is_empty() {
651 components.pop();
652 }
653 }
654 std::path::Component::CurDir => {
655 }
657 _ => {
658 components.push(component);
659 }
660 }
661 }
662
663 components.iter().collect()
664 }
665}
666
667impl FileIndex {
668 pub fn new() -> Self {
670 Self::default()
671 }
672
673 pub fn with_hash(content_hash: String) -> Self {
675 Self {
676 content_hash,
677 ..Default::default()
678 }
679 }
680
681 pub fn add_heading(&mut self, heading: HeadingIndex) {
685 let index = self.headings.len();
686
687 self.anchor_to_heading.insert(heading.auto_anchor.to_lowercase(), index);
689
690 if let Some(ref custom) = heading.custom_anchor {
692 self.anchor_to_heading.insert(custom.to_lowercase(), index);
693 }
694
695 self.headings.push(heading);
696 }
697
698 pub fn add_anchor_alias(&mut self, anchor: String, heading_index: usize) {
701 if heading_index < self.headings.len() {
702 self.anchor_to_heading.insert(anchor.to_lowercase(), heading_index);
703 }
704 }
705
706 pub fn has_anchor(&self, anchor: &str) -> bool {
717 let lower = anchor.to_lowercase();
718
719 if self.anchor_to_heading.contains_key(&lower)
721 || self.html_anchors.contains(&lower)
722 || self.attribute_anchors.contains(&lower)
723 {
724 return true;
725 }
726
727 if anchor.contains('%') {
729 let decoded = url_decode(anchor).to_lowercase();
730 if decoded != lower {
731 return self.anchor_to_heading.contains_key(&decoded)
732 || self.html_anchors.contains(&decoded)
733 || self.attribute_anchors.contains(&decoded);
734 }
735 }
736
737 false
738 }
739
740 pub fn add_html_anchor(&mut self, anchor: String) {
742 if !anchor.is_empty() {
743 self.html_anchors.insert(anchor.to_lowercase());
744 }
745 }
746
747 pub fn add_attribute_anchor(&mut self, anchor: String) {
749 if !anchor.is_empty() {
750 self.attribute_anchors.insert(anchor.to_lowercase());
751 }
752 }
753
754 pub fn get_heading_by_anchor(&self, anchor: &str) -> Option<&HeadingIndex> {
758 self.anchor_to_heading
759 .get(&anchor.to_lowercase())
760 .and_then(|&idx| self.headings.get(idx))
761 }
762
763 pub fn add_reference_link(&mut self, link: ReferenceLinkIndex) {
765 self.reference_links.push(link);
766 }
767
768 pub fn is_rule_disabled_at_line(&self, rule_name: &str, line: usize) -> bool {
773 if self.file_disabled_rules.contains("*") || self.file_disabled_rules.contains(rule_name) {
775 return true;
776 }
777
778 if let Some(rules) = self.line_disabled_rules.get(&line) {
780 return rules.contains("*") || rules.contains(rule_name);
781 }
782
783 false
784 }
785
786 pub fn add_cross_file_link(&mut self, link: CrossFileLinkIndex) {
788 let is_duplicate = self.cross_file_links.iter().any(|existing| {
791 existing.target_path == link.target_path && existing.fragment == link.fragment && existing.line == link.line
792 });
793 if !is_duplicate {
794 self.cross_file_links.push(link);
795 }
796 }
797
798 pub fn add_defined_reference(&mut self, ref_id: String) {
800 self.defined_references.insert(ref_id);
801 }
802
803 pub fn has_defined_reference(&self, ref_id: &str) -> bool {
805 self.defined_references.contains(ref_id)
806 }
807
808 pub fn hash_matches(&self, hash: &str) -> bool {
810 self.content_hash == hash
811 }
812
813 pub fn heading_count(&self) -> usize {
815 self.headings.len()
816 }
817
818 pub fn reference_link_count(&self) -> usize {
820 self.reference_links.len()
821 }
822}
823
824#[cfg(test)]
825mod tests {
826 use super::*;
827
828 #[test]
829 fn test_workspace_index_basic() {
830 let mut index = WorkspaceIndex::new();
831 assert_eq!(index.file_count(), 0);
832 assert_eq!(index.version(), 0);
833
834 let mut file_index = FileIndex::with_hash("abc123".to_string());
835 file_index.add_heading(HeadingIndex {
836 text: "Installation".to_string(),
837 auto_anchor: "installation".to_string(),
838 custom_anchor: None,
839 line: 1,
840 });
841
842 index.insert_file(PathBuf::from("docs/install.md"), file_index);
843 assert_eq!(index.file_count(), 1);
844 assert_eq!(index.version(), 1);
845
846 assert!(index.contains_file(Path::new("docs/install.md")));
847 assert!(!index.contains_file(Path::new("docs/other.md")));
848 }
849
850 #[test]
851 fn test_vulnerable_anchors() {
852 let mut index = WorkspaceIndex::new();
853
854 let mut file1 = FileIndex::new();
856 file1.add_heading(HeadingIndex {
857 text: "Getting Started".to_string(),
858 auto_anchor: "getting-started".to_string(),
859 custom_anchor: None,
860 line: 1,
861 });
862 index.insert_file(PathBuf::from("docs/guide.md"), file1);
863
864 let mut file2 = FileIndex::new();
866 file2.add_heading(HeadingIndex {
867 text: "Installation".to_string(),
868 auto_anchor: "installation".to_string(),
869 custom_anchor: Some("install".to_string()),
870 line: 1,
871 });
872 index.insert_file(PathBuf::from("docs/install.md"), file2);
873
874 let vulnerable = index.get_vulnerable_anchors();
875 assert_eq!(vulnerable.len(), 1);
876 assert!(vulnerable.contains_key("getting-started"));
877 assert!(!vulnerable.contains_key("installation"));
878
879 let anchors = vulnerable.get("getting-started").unwrap();
880 assert_eq!(anchors.len(), 1);
881 assert_eq!(anchors[0].file, PathBuf::from("docs/guide.md"));
882 assert_eq!(anchors[0].text, "Getting Started");
883 }
884
885 #[test]
886 fn test_vulnerable_anchors_multiple_files_same_anchor() {
887 let mut index = WorkspaceIndex::new();
890
891 let mut file1 = FileIndex::new();
893 file1.add_heading(HeadingIndex {
894 text: "Installation".to_string(),
895 auto_anchor: "installation".to_string(),
896 custom_anchor: None,
897 line: 1,
898 });
899 index.insert_file(PathBuf::from("docs/en/guide.md"), file1);
900
901 let mut file2 = FileIndex::new();
903 file2.add_heading(HeadingIndex {
904 text: "Installation".to_string(),
905 auto_anchor: "installation".to_string(),
906 custom_anchor: None,
907 line: 5,
908 });
909 index.insert_file(PathBuf::from("docs/fr/guide.md"), file2);
910
911 let mut file3 = FileIndex::new();
913 file3.add_heading(HeadingIndex {
914 text: "Installation".to_string(),
915 auto_anchor: "installation".to_string(),
916 custom_anchor: Some("install".to_string()),
917 line: 10,
918 });
919 index.insert_file(PathBuf::from("docs/de/guide.md"), file3);
920
921 let vulnerable = index.get_vulnerable_anchors();
922 assert_eq!(vulnerable.len(), 1); assert!(vulnerable.contains_key("installation"));
924
925 let anchors = vulnerable.get("installation").unwrap();
926 assert_eq!(anchors.len(), 2, "Should collect both vulnerable anchors");
928
929 let files: std::collections::HashSet<_> = anchors.iter().map(|a| &a.file).collect();
931 assert!(files.contains(&PathBuf::from("docs/en/guide.md")));
932 assert!(files.contains(&PathBuf::from("docs/fr/guide.md")));
933 }
934
935 #[test]
936 fn test_file_index_hash() {
937 let index = FileIndex::with_hash("hash123".to_string());
938 assert!(index.hash_matches("hash123"));
939 assert!(!index.hash_matches("other"));
940 }
941
942 #[test]
943 fn test_version_increment() {
944 let mut index = WorkspaceIndex::new();
945 assert_eq!(index.version(), 0);
946
947 index.insert_file(PathBuf::from("a.md"), FileIndex::new());
948 assert_eq!(index.version(), 1);
949
950 index.insert_file(PathBuf::from("b.md"), FileIndex::new());
951 assert_eq!(index.version(), 2);
952
953 index.remove_file(Path::new("a.md"));
954 assert_eq!(index.version(), 3);
955
956 index.remove_file(Path::new("nonexistent.md"));
958 assert_eq!(index.version(), 3);
959 }
960
961 #[test]
962 fn test_reverse_deps_basic() {
963 let mut index = WorkspaceIndex::new();
964
965 let mut file_a = FileIndex::new();
967 file_a.add_cross_file_link(CrossFileLinkIndex {
968 target_path: "b.md".to_string(),
969 fragment: "section".to_string(),
970 line: 10,
971 column: 5,
972 });
973 index.update_file(Path::new("docs/a.md"), file_a);
974
975 let dependents = index.get_dependents(Path::new("docs/b.md"));
977 assert_eq!(dependents.len(), 1);
978 assert_eq!(dependents[0], PathBuf::from("docs/a.md"));
979
980 let a_dependents = index.get_dependents(Path::new("docs/a.md"));
982 assert!(a_dependents.is_empty());
983 }
984
985 #[test]
986 fn test_reverse_deps_multiple() {
987 let mut index = WorkspaceIndex::new();
988
989 let mut file_a = FileIndex::new();
991 file_a.add_cross_file_link(CrossFileLinkIndex {
992 target_path: "../b.md".to_string(),
993 fragment: "".to_string(),
994 line: 1,
995 column: 1,
996 });
997 index.update_file(Path::new("docs/sub/a.md"), file_a);
998
999 let mut file_c = FileIndex::new();
1000 file_c.add_cross_file_link(CrossFileLinkIndex {
1001 target_path: "b.md".to_string(),
1002 fragment: "".to_string(),
1003 line: 1,
1004 column: 1,
1005 });
1006 index.update_file(Path::new("docs/c.md"), file_c);
1007
1008 let dependents = index.get_dependents(Path::new("docs/b.md"));
1010 assert_eq!(dependents.len(), 2);
1011 assert!(dependents.contains(&PathBuf::from("docs/sub/a.md")));
1012 assert!(dependents.contains(&PathBuf::from("docs/c.md")));
1013 }
1014
1015 #[test]
1016 fn test_reverse_deps_update_clears_old() {
1017 let mut index = WorkspaceIndex::new();
1018
1019 let mut file_a = FileIndex::new();
1021 file_a.add_cross_file_link(CrossFileLinkIndex {
1022 target_path: "b.md".to_string(),
1023 fragment: "".to_string(),
1024 line: 1,
1025 column: 1,
1026 });
1027 index.update_file(Path::new("docs/a.md"), file_a);
1028
1029 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1031
1032 let mut file_a_updated = FileIndex::new();
1034 file_a_updated.add_cross_file_link(CrossFileLinkIndex {
1035 target_path: "c.md".to_string(),
1036 fragment: "".to_string(),
1037 line: 1,
1038 column: 1,
1039 });
1040 index.update_file(Path::new("docs/a.md"), file_a_updated);
1041
1042 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1044
1045 let c_deps = index.get_dependents(Path::new("docs/c.md"));
1047 assert_eq!(c_deps.len(), 1);
1048 assert_eq!(c_deps[0], PathBuf::from("docs/a.md"));
1049 }
1050
1051 #[test]
1052 fn test_reverse_deps_remove_file() {
1053 let mut index = WorkspaceIndex::new();
1054
1055 let mut file_a = FileIndex::new();
1057 file_a.add_cross_file_link(CrossFileLinkIndex {
1058 target_path: "b.md".to_string(),
1059 fragment: "".to_string(),
1060 line: 1,
1061 column: 1,
1062 });
1063 index.update_file(Path::new("docs/a.md"), file_a);
1064
1065 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1067
1068 index.remove_file(Path::new("docs/a.md"));
1070
1071 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1073 }
1074
1075 #[test]
1076 fn test_normalize_path() {
1077 let path = Path::new("docs/sub/../other.md");
1079 let normalized = WorkspaceIndex::normalize_path(path);
1080 assert_eq!(normalized, PathBuf::from("docs/other.md"));
1081
1082 let path2 = Path::new("docs/./other.md");
1084 let normalized2 = WorkspaceIndex::normalize_path(path2);
1085 assert_eq!(normalized2, PathBuf::from("docs/other.md"));
1086
1087 let path3 = Path::new("a/b/c/../../d.md");
1089 let normalized3 = WorkspaceIndex::normalize_path(path3);
1090 assert_eq!(normalized3, PathBuf::from("a/d.md"));
1091 }
1092
1093 #[test]
1094 fn test_clear_clears_reverse_deps() {
1095 let mut index = WorkspaceIndex::new();
1096
1097 let mut file_a = FileIndex::new();
1099 file_a.add_cross_file_link(CrossFileLinkIndex {
1100 target_path: "b.md".to_string(),
1101 fragment: "".to_string(),
1102 line: 1,
1103 column: 1,
1104 });
1105 index.update_file(Path::new("docs/a.md"), file_a);
1106
1107 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1109
1110 index.clear();
1112
1113 assert_eq!(index.file_count(), 0);
1115 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1116 }
1117
1118 #[test]
1119 fn test_is_file_stale() {
1120 let mut index = WorkspaceIndex::new();
1121
1122 assert!(index.is_file_stale(Path::new("nonexistent.md"), "hash123"));
1124
1125 let file_index = FileIndex::with_hash("hash123".to_string());
1127 index.insert_file(PathBuf::from("docs/test.md"), file_index);
1128
1129 assert!(!index.is_file_stale(Path::new("docs/test.md"), "hash123"));
1131
1132 assert!(index.is_file_stale(Path::new("docs/test.md"), "different_hash"));
1134 }
1135
1136 #[cfg(feature = "native")]
1137 #[test]
1138 fn test_cache_roundtrip() {
1139 use std::fs;
1140
1141 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_roundtrip");
1143 let _ = fs::remove_dir_all(&temp_dir);
1144 fs::create_dir_all(&temp_dir).unwrap();
1145
1146 let mut index = WorkspaceIndex::new();
1148
1149 let mut file1 = FileIndex::with_hash("abc123".to_string());
1150 file1.add_heading(HeadingIndex {
1151 text: "Test Heading".to_string(),
1152 auto_anchor: "test-heading".to_string(),
1153 custom_anchor: Some("test".to_string()),
1154 line: 1,
1155 });
1156 file1.add_cross_file_link(CrossFileLinkIndex {
1157 target_path: "./other.md".to_string(),
1158 fragment: "section".to_string(),
1159 line: 5,
1160 column: 3,
1161 });
1162 index.update_file(Path::new("docs/file1.md"), file1);
1163
1164 let mut file2 = FileIndex::with_hash("def456".to_string());
1165 file2.add_heading(HeadingIndex {
1166 text: "Another Heading".to_string(),
1167 auto_anchor: "another-heading".to_string(),
1168 custom_anchor: None,
1169 line: 1,
1170 });
1171 index.update_file(Path::new("docs/other.md"), file2);
1172
1173 index.save_to_cache(&temp_dir).expect("Failed to save cache");
1175
1176 assert!(temp_dir.join("workspace_index.bin").exists());
1178
1179 let loaded = WorkspaceIndex::load_from_cache(&temp_dir).expect("Failed to load cache");
1181
1182 assert_eq!(loaded.file_count(), 2);
1184 assert!(loaded.contains_file(Path::new("docs/file1.md")));
1185 assert!(loaded.contains_file(Path::new("docs/other.md")));
1186
1187 let file1_loaded = loaded.get_file(Path::new("docs/file1.md")).unwrap();
1189 assert_eq!(file1_loaded.content_hash, "abc123");
1190 assert_eq!(file1_loaded.headings.len(), 1);
1191 assert_eq!(file1_loaded.headings[0].text, "Test Heading");
1192 assert_eq!(file1_loaded.headings[0].custom_anchor, Some("test".to_string()));
1193 assert_eq!(file1_loaded.cross_file_links.len(), 1);
1194 assert_eq!(file1_loaded.cross_file_links[0].target_path, "./other.md");
1195
1196 let dependents = loaded.get_dependents(Path::new("docs/other.md"));
1198 assert_eq!(dependents.len(), 1);
1199 assert_eq!(dependents[0], PathBuf::from("docs/file1.md"));
1200
1201 let _ = fs::remove_dir_all(&temp_dir);
1203 }
1204
1205 #[cfg(feature = "native")]
1206 #[test]
1207 fn test_cache_missing_file() {
1208 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_missing");
1209 let _ = std::fs::remove_dir_all(&temp_dir);
1210
1211 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1213 assert!(result.is_none());
1214 }
1215
1216 #[cfg(feature = "native")]
1217 #[test]
1218 fn test_cache_corrupted_file() {
1219 use std::fs;
1220
1221 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_corrupted");
1222 let _ = fs::remove_dir_all(&temp_dir);
1223 fs::create_dir_all(&temp_dir).unwrap();
1224
1225 fs::write(temp_dir.join("workspace_index.bin"), b"bad").unwrap();
1227
1228 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1230 assert!(result.is_none());
1231
1232 assert!(!temp_dir.join("workspace_index.bin").exists());
1234
1235 let _ = fs::remove_dir_all(&temp_dir);
1237 }
1238
1239 #[cfg(feature = "native")]
1240 #[test]
1241 fn test_cache_invalid_magic() {
1242 use std::fs;
1243
1244 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_invalid_magic");
1245 let _ = fs::remove_dir_all(&temp_dir);
1246 fs::create_dir_all(&temp_dir).unwrap();
1247
1248 let mut data = Vec::new();
1250 data.extend_from_slice(b"XXXX"); data.extend_from_slice(&1u32.to_le_bytes()); data.extend_from_slice(&[0; 100]); fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
1254
1255 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1257 assert!(result.is_none());
1258
1259 assert!(!temp_dir.join("workspace_index.bin").exists());
1261
1262 let _ = fs::remove_dir_all(&temp_dir);
1264 }
1265
1266 #[cfg(feature = "native")]
1267 #[test]
1268 fn test_cache_version_mismatch() {
1269 use std::fs;
1270
1271 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_version_mismatch");
1272 let _ = fs::remove_dir_all(&temp_dir);
1273 fs::create_dir_all(&temp_dir).unwrap();
1274
1275 let mut data = Vec::new();
1277 data.extend_from_slice(b"RWSI"); data.extend_from_slice(&999u32.to_le_bytes()); data.extend_from_slice(&[0; 100]); fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
1281
1282 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1284 assert!(result.is_none());
1285
1286 assert!(!temp_dir.join("workspace_index.bin").exists());
1288
1289 let _ = fs::remove_dir_all(&temp_dir);
1291 }
1292
1293 #[cfg(feature = "native")]
1294 #[test]
1295 fn test_cache_atomic_write() {
1296 use std::fs;
1297
1298 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_atomic");
1300 let _ = fs::remove_dir_all(&temp_dir);
1301 fs::create_dir_all(&temp_dir).unwrap();
1302
1303 let index = WorkspaceIndex::new();
1304 index.save_to_cache(&temp_dir).expect("Failed to save");
1305
1306 let entries: Vec<_> = fs::read_dir(&temp_dir).unwrap().collect();
1308 assert_eq!(entries.len(), 1);
1309 assert!(temp_dir.join("workspace_index.bin").exists());
1310
1311 let _ = fs::remove_dir_all(&temp_dir);
1313 }
1314
1315 #[test]
1316 fn test_has_anchor_auto_generated() {
1317 let mut file_index = FileIndex::new();
1318 file_index.add_heading(HeadingIndex {
1319 text: "Installation Guide".to_string(),
1320 auto_anchor: "installation-guide".to_string(),
1321 custom_anchor: None,
1322 line: 1,
1323 });
1324
1325 assert!(file_index.has_anchor("installation-guide"));
1327
1328 assert!(file_index.has_anchor("Installation-Guide"));
1330 assert!(file_index.has_anchor("INSTALLATION-GUIDE"));
1331
1332 assert!(!file_index.has_anchor("nonexistent"));
1334 }
1335
1336 #[test]
1337 fn test_has_anchor_custom() {
1338 let mut file_index = FileIndex::new();
1339 file_index.add_heading(HeadingIndex {
1340 text: "Installation Guide".to_string(),
1341 auto_anchor: "installation-guide".to_string(),
1342 custom_anchor: Some("install".to_string()),
1343 line: 1,
1344 });
1345
1346 assert!(file_index.has_anchor("installation-guide"));
1348
1349 assert!(file_index.has_anchor("install"));
1351 assert!(file_index.has_anchor("Install")); assert!(!file_index.has_anchor("nonexistent"));
1355 }
1356
1357 #[test]
1358 fn test_get_heading_by_anchor() {
1359 let mut file_index = FileIndex::new();
1360 file_index.add_heading(HeadingIndex {
1361 text: "Installation Guide".to_string(),
1362 auto_anchor: "installation-guide".to_string(),
1363 custom_anchor: Some("install".to_string()),
1364 line: 10,
1365 });
1366 file_index.add_heading(HeadingIndex {
1367 text: "Configuration".to_string(),
1368 auto_anchor: "configuration".to_string(),
1369 custom_anchor: None,
1370 line: 20,
1371 });
1372
1373 let heading = file_index.get_heading_by_anchor("installation-guide");
1375 assert!(heading.is_some());
1376 assert_eq!(heading.unwrap().text, "Installation Guide");
1377 assert_eq!(heading.unwrap().line, 10);
1378
1379 let heading = file_index.get_heading_by_anchor("install");
1381 assert!(heading.is_some());
1382 assert_eq!(heading.unwrap().text, "Installation Guide");
1383
1384 let heading = file_index.get_heading_by_anchor("configuration");
1386 assert!(heading.is_some());
1387 assert_eq!(heading.unwrap().text, "Configuration");
1388 assert_eq!(heading.unwrap().line, 20);
1389
1390 assert!(file_index.get_heading_by_anchor("nonexistent").is_none());
1392 }
1393
1394 #[test]
1395 fn test_anchor_lookup_many_headings() {
1396 let mut file_index = FileIndex::new();
1398
1399 for i in 0..100 {
1401 file_index.add_heading(HeadingIndex {
1402 text: format!("Heading {i}"),
1403 auto_anchor: format!("heading-{i}"),
1404 custom_anchor: Some(format!("h{i}")),
1405 line: i + 1,
1406 });
1407 }
1408
1409 for i in 0..100 {
1411 assert!(file_index.has_anchor(&format!("heading-{i}")));
1412 assert!(file_index.has_anchor(&format!("h{i}")));
1413
1414 let heading = file_index.get_heading_by_anchor(&format!("heading-{i}"));
1415 assert!(heading.is_some());
1416 assert_eq!(heading.unwrap().line, i + 1);
1417 }
1418 }
1419
1420 #[test]
1425 fn test_extract_cross_file_links_basic() {
1426 use crate::config::MarkdownFlavor;
1427
1428 let content = "# Test\n\nSee [link](./other.md) for info.\n";
1429 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1430 let links = extract_cross_file_links(&ctx);
1431
1432 assert_eq!(links.len(), 1);
1433 assert_eq!(links[0].target_path, "./other.md");
1434 assert_eq!(links[0].fragment, "");
1435 assert_eq!(links[0].line, 3);
1436 assert_eq!(links[0].column, 12);
1438 }
1439
1440 #[test]
1441 fn test_extract_cross_file_links_with_fragment() {
1442 use crate::config::MarkdownFlavor;
1443
1444 let content = "Check [guide](./guide.md#install) here.\n";
1445 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1446 let links = extract_cross_file_links(&ctx);
1447
1448 assert_eq!(links.len(), 1);
1449 assert_eq!(links[0].target_path, "./guide.md");
1450 assert_eq!(links[0].fragment, "install");
1451 assert_eq!(links[0].line, 1);
1452 assert_eq!(links[0].column, 15);
1454 }
1455
1456 #[test]
1457 fn test_extract_cross_file_links_multiple_on_same_line() {
1458 use crate::config::MarkdownFlavor;
1459
1460 let content = "See [a](a.md) and [b](b.md) here.\n";
1461 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1462 let links = extract_cross_file_links(&ctx);
1463
1464 assert_eq!(links.len(), 2);
1465
1466 assert_eq!(links[0].target_path, "a.md");
1467 assert_eq!(links[0].line, 1);
1468 assert_eq!(links[0].column, 9);
1470
1471 assert_eq!(links[1].target_path, "b.md");
1472 assert_eq!(links[1].line, 1);
1473 assert_eq!(links[1].column, 23);
1475 }
1476
1477 #[test]
1478 fn test_extract_cross_file_links_angle_brackets() {
1479 use crate::config::MarkdownFlavor;
1480
1481 let content = "See [link](<path/with (parens).md>) here.\n";
1482 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1483 let links = extract_cross_file_links(&ctx);
1484
1485 assert_eq!(links.len(), 1);
1486 assert_eq!(links[0].target_path, "path/with (parens).md");
1487 assert_eq!(links[0].line, 1);
1488 assert_eq!(links[0].column, 13);
1490 }
1491
1492 #[test]
1493 fn test_extract_cross_file_links_skips_external() {
1494 use crate::config::MarkdownFlavor;
1495
1496 let content = r#"
1497[external](https://example.com)
1498[mailto](mailto:test@example.com)
1499[local](./local.md)
1500[fragment](#section)
1501[absolute](/docs/page.md)
1502"#;
1503 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1504 let links = extract_cross_file_links(&ctx);
1505
1506 assert_eq!(links.len(), 1);
1508 assert_eq!(links[0].target_path, "./local.md");
1509 }
1510
1511 #[test]
1512 fn test_extract_cross_file_links_skips_non_markdown() {
1513 use crate::config::MarkdownFlavor;
1514
1515 let content = r#"
1516[image](./photo.png)
1517[doc](./readme.md)
1518[pdf](./document.pdf)
1519"#;
1520 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1521 let links = extract_cross_file_links(&ctx);
1522
1523 assert_eq!(links.len(), 1);
1525 assert_eq!(links[0].target_path, "./readme.md");
1526 }
1527
1528 #[test]
1529 fn test_extract_cross_file_links_skips_code_spans() {
1530 use crate::config::MarkdownFlavor;
1531
1532 let content = "Normal [link](./file.md) and `[code](./ignored.md)` here.\n";
1533 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1534 let links = extract_cross_file_links(&ctx);
1535
1536 assert_eq!(links.len(), 1);
1538 assert_eq!(links[0].target_path, "./file.md");
1539 }
1540
1541 #[test]
1542 fn test_extract_cross_file_links_with_query_params() {
1543 use crate::config::MarkdownFlavor;
1544
1545 let content = "See [doc](./file.md?raw=true) here.\n";
1546 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1547 let links = extract_cross_file_links(&ctx);
1548
1549 assert_eq!(links.len(), 1);
1550 assert_eq!(links[0].target_path, "./file.md");
1552 }
1553
1554 #[test]
1555 fn test_extract_cross_file_links_empty_content() {
1556 use crate::config::MarkdownFlavor;
1557
1558 let content = "";
1559 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1560 let links = extract_cross_file_links(&ctx);
1561
1562 assert!(links.is_empty());
1563 }
1564
1565 #[test]
1566 fn test_extract_cross_file_links_no_links() {
1567 use crate::config::MarkdownFlavor;
1568
1569 let content = "# Just a heading\n\nSome text without links.\n";
1570 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1571 let links = extract_cross_file_links(&ctx);
1572
1573 assert!(links.is_empty());
1574 }
1575
1576 #[test]
1577 fn test_extract_cross_file_links_position_accuracy_issue_234() {
1578 use crate::config::MarkdownFlavor;
1581
1582 let content = r#"# Test Document
1583
1584Here is a [broken link](nonexistent-file.md) that should trigger MD057.
1585
1586And another [link](also-missing.md) on this line.
1587"#;
1588 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1589 let links = extract_cross_file_links(&ctx);
1590
1591 assert_eq!(links.len(), 2);
1592
1593 assert_eq!(links[0].target_path, "nonexistent-file.md");
1595 assert_eq!(links[0].line, 3);
1596 assert_eq!(links[0].column, 25);
1597
1598 assert_eq!(links[1].target_path, "also-missing.md");
1600 assert_eq!(links[1].line, 5);
1601 assert_eq!(links[1].column, 20);
1602 }
1603}