1use regex::Regex;
22use serde::{Deserialize, Serialize};
23use std::collections::{HashMap, HashSet};
24use std::path::{Path, PathBuf};
25use std::sync::LazyLock;
26
27use crate::lint_context::LintContext;
28use crate::utils::element_cache::ElementCache;
29
30fn hex_digit_to_value(c: u8) -> Option<u8> {
36 match c {
37 b'0'..=b'9' => Some(c - b'0'),
38 b'a'..=b'f' => Some(c - b'a' + 10),
39 b'A'..=b'F' => Some(c - b'A' + 10),
40 _ => None,
41 }
42}
43
44fn url_decode(s: &str) -> String {
48 if !s.contains('%') {
50 return s.to_string();
51 }
52
53 let bytes = s.as_bytes();
54 let mut result = Vec::with_capacity(bytes.len());
55 let mut i = 0;
56
57 while i < bytes.len() {
58 if bytes[i] == b'%' && i + 2 < bytes.len() {
59 let hex1 = bytes[i + 1];
61 let hex2 = bytes[i + 2];
62 if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
63 result.push(d1 * 16 + d2);
64 i += 3;
65 continue;
66 }
67 }
68 result.push(bytes[i]);
69 i += 1;
70 }
71
72 String::from_utf8(result).unwrap_or_else(|_| s.to_string())
74}
75
76static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
86
87static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
90 LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
91
92static URL_EXTRACT_REGEX: LazyLock<Regex> =
95 LazyLock::new(|| Regex::new(r#"]\(\s*([^>)\s#]+)(#[^)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
96
97static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
99 LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
100
101const MARKDOWN_EXTENSIONS: &[&str] = &[
103 ".md",
104 ".markdown",
105 ".mdx",
106 ".mkd",
107 ".mkdn",
108 ".mdown",
109 ".mdwn",
110 ".qmd",
111 ".rmd",
112];
113
114#[inline]
116fn is_markdown_file(path: &str) -> bool {
117 let path_lower = path.to_lowercase();
118 MARKDOWN_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext))
119}
120
121fn strip_query_and_fragment(url: &str) -> &str {
124 let query_pos = url.find('?');
125 let fragment_pos = url.find('#');
126
127 match (query_pos, fragment_pos) {
128 (Some(q), Some(f)) => &url[..q.min(f)],
129 (Some(q), None) => &url[..q],
130 (None, Some(f)) => &url[..f],
131 (None, None) => url,
132 }
133}
134
135pub fn extract_cross_file_links(ctx: &LintContext) -> Vec<CrossFileLinkIndex> {
143 let content = ctx.content;
144
145 if content.is_empty() || !content.contains("](") {
147 return Vec::new();
148 }
149
150 let mut links = Vec::new();
151 let lines: Vec<&str> = content.lines().collect();
152 let element_cache = ElementCache::new(content);
153 let line_index = &ctx.line_index;
154
155 let mut processed_lines = HashSet::new();
158
159 for link in &ctx.links {
160 let line_idx = link.line - 1;
161 if line_idx >= lines.len() {
162 continue;
163 }
164
165 if !processed_lines.insert(line_idx) {
167 continue;
168 }
169
170 let line = lines[line_idx];
171 if !line.contains("](") {
172 continue;
173 }
174
175 for link_match in LINK_START_REGEX.find_iter(line) {
177 let start_pos = link_match.start();
178 let end_pos = link_match.end();
179
180 let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
182 let absolute_start_pos = line_start_byte + start_pos;
183
184 if element_cache.is_in_code_span(absolute_start_pos) {
186 continue;
187 }
188
189 let caps_result = URL_EXTRACT_ANGLE_BRACKET_REGEX
192 .captures_at(line, end_pos - 1)
193 .or_else(|| URL_EXTRACT_REGEX.captures_at(line, end_pos - 1));
194
195 if let Some(caps) = caps_result
196 && let Some(url_group) = caps.get(1)
197 {
198 let file_path = url_group.as_str().trim();
199
200 if file_path.is_empty()
203 || PROTOCOL_DOMAIN_REGEX.is_match(file_path)
204 || file_path.starts_with("www.")
205 || file_path.starts_with('#')
206 || file_path.starts_with("{{")
207 || file_path.starts_with("{%")
208 || file_path.starts_with('/')
209 || file_path.starts_with('~')
210 || file_path.starts_with('@')
211 || (file_path.starts_with('`') && file_path.ends_with('`'))
212 {
213 continue;
214 }
215
216 let file_path = strip_query_and_fragment(file_path);
218
219 let fragment = caps.get(2).map(|m| m.as_str().trim_start_matches('#')).unwrap_or("");
221
222 if is_markdown_file(file_path) {
224 links.push(CrossFileLinkIndex {
225 target_path: file_path.to_string(),
226 fragment: fragment.to_string(),
227 line: link.line,
228 column: url_group.start() + 1,
229 });
230 }
231 }
232 }
233 }
234
235 links
236}
237
238#[cfg(feature = "native")]
240const CACHE_MAGIC: &[u8; 4] = b"RWSI";
241
242#[cfg(feature = "native")]
244const CACHE_FORMAT_VERSION: u32 = 5;
245
246#[cfg(feature = "native")]
248const CACHE_FILE_NAME: &str = "workspace_index.bin";
249
250#[derive(Debug, Default, Clone, Serialize, Deserialize)]
255pub struct WorkspaceIndex {
256 files: HashMap<PathBuf, FileIndex>,
258 reverse_deps: HashMap<PathBuf, HashSet<PathBuf>>,
261 version: u64,
263}
264
265#[derive(Debug, Clone, Default, Serialize, Deserialize)]
267pub struct FileIndex {
268 pub headings: Vec<HeadingIndex>,
270 pub reference_links: Vec<ReferenceLinkIndex>,
272 pub cross_file_links: Vec<CrossFileLinkIndex>,
274 pub defined_references: HashSet<String>,
277 pub content_hash: String,
279 anchor_to_heading: HashMap<String, usize>,
282 html_anchors: HashSet<String>,
285 attribute_anchors: HashSet<String>,
289 pub file_disabled_rules: HashSet<String>,
292 pub line_disabled_rules: HashMap<usize, HashSet<String>>,
295}
296
297#[derive(Debug, Clone, Serialize, Deserialize)]
299pub struct HeadingIndex {
300 pub text: String,
302 pub auto_anchor: String,
304 pub custom_anchor: Option<String>,
306 pub line: usize,
308}
309
310#[derive(Debug, Clone, Serialize, Deserialize)]
312pub struct ReferenceLinkIndex {
313 pub reference_id: String,
315 pub line: usize,
317 pub column: usize,
319}
320
321#[derive(Debug, Clone, Serialize, Deserialize)]
323pub struct CrossFileLinkIndex {
324 pub target_path: String,
326 pub fragment: String,
328 pub line: usize,
330 pub column: usize,
332}
333
334#[derive(Debug, Clone, Serialize, Deserialize)]
336pub struct VulnerableAnchor {
337 pub file: PathBuf,
339 pub line: usize,
341 pub text: String,
343}
344
345impl WorkspaceIndex {
346 pub fn new() -> Self {
348 Self::default()
349 }
350
351 pub fn version(&self) -> u64 {
353 self.version
354 }
355
356 pub fn file_count(&self) -> usize {
358 self.files.len()
359 }
360
361 pub fn contains_file(&self, path: &Path) -> bool {
363 self.files.contains_key(path)
364 }
365
366 pub fn get_file(&self, path: &Path) -> Option<&FileIndex> {
368 self.files.get(path)
369 }
370
371 pub fn insert_file(&mut self, path: PathBuf, index: FileIndex) {
373 self.files.insert(path, index);
374 self.version = self.version.wrapping_add(1);
375 }
376
377 pub fn remove_file(&mut self, path: &Path) -> Option<FileIndex> {
379 self.clear_reverse_deps_for(path);
381
382 let result = self.files.remove(path);
383 if result.is_some() {
384 self.version = self.version.wrapping_add(1);
385 }
386 result
387 }
388
389 pub fn get_vulnerable_anchors(&self) -> HashMap<String, Vec<VulnerableAnchor>> {
399 let mut vulnerable: HashMap<String, Vec<VulnerableAnchor>> = HashMap::new();
400
401 for (file_path, file_index) in &self.files {
402 for heading in &file_index.headings {
403 if heading.custom_anchor.is_none() && !heading.auto_anchor.is_empty() {
405 let anchor_key = heading.auto_anchor.to_lowercase();
406 vulnerable.entry(anchor_key).or_default().push(VulnerableAnchor {
407 file: file_path.clone(),
408 line: heading.line,
409 text: heading.text.clone(),
410 });
411 }
412 }
413 }
414
415 vulnerable
416 }
417
418 pub fn all_headings(&self) -> impl Iterator<Item = (&Path, &HeadingIndex)> {
420 self.files
421 .iter()
422 .flat_map(|(path, index)| index.headings.iter().map(move |h| (path.as_path(), h)))
423 }
424
425 pub fn files(&self) -> impl Iterator<Item = (&Path, &FileIndex)> {
427 self.files.iter().map(|(p, i)| (p.as_path(), i))
428 }
429
430 pub fn clear(&mut self) {
432 self.files.clear();
433 self.reverse_deps.clear();
434 self.version = self.version.wrapping_add(1);
435 }
436
437 pub fn update_file(&mut self, path: &Path, index: FileIndex) {
444 self.clear_reverse_deps_as_source(path);
447
448 for link in &index.cross_file_links {
450 let target = self.resolve_target_path(path, &link.target_path);
451 self.reverse_deps.entry(target).or_default().insert(path.to_path_buf());
452 }
453
454 self.files.insert(path.to_path_buf(), index);
455 self.version = self.version.wrapping_add(1);
456 }
457
458 pub fn get_dependents(&self, path: &Path) -> Vec<PathBuf> {
463 self.reverse_deps
464 .get(path)
465 .map(|set| set.iter().cloned().collect())
466 .unwrap_or_default()
467 }
468
469 pub fn is_file_stale(&self, path: &Path, current_hash: &str) -> bool {
473 self.files
474 .get(path)
475 .map(|f| f.content_hash != current_hash)
476 .unwrap_or(true)
477 }
478
479 pub fn retain_only(&mut self, current_files: &std::collections::HashSet<PathBuf>) -> usize {
484 let before_count = self.files.len();
485
486 let to_remove: Vec<PathBuf> = self
488 .files
489 .keys()
490 .filter(|path| !current_files.contains(*path))
491 .cloned()
492 .collect();
493
494 for path in &to_remove {
496 self.remove_file(path);
497 }
498
499 before_count - self.files.len()
500 }
501
502 #[cfg(feature = "native")]
509 pub fn save_to_cache(&self, cache_dir: &Path) -> std::io::Result<()> {
510 use std::fs;
511 use std::io::Write;
512
513 fs::create_dir_all(cache_dir)?;
515
516 let encoded = postcard::to_allocvec(self)
518 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
519
520 let mut cache_data = Vec::with_capacity(8 + encoded.len());
522 cache_data.extend_from_slice(CACHE_MAGIC);
523 cache_data.extend_from_slice(&CACHE_FORMAT_VERSION.to_le_bytes());
524 cache_data.extend_from_slice(&encoded);
525
526 let final_path = cache_dir.join(CACHE_FILE_NAME);
528 let temp_path = cache_dir.join(format!("{}.tmp.{}", CACHE_FILE_NAME, std::process::id()));
529
530 {
532 let mut file = fs::File::create(&temp_path)?;
533 file.write_all(&cache_data)?;
534 file.sync_all()?;
535 }
536
537 fs::rename(&temp_path, &final_path)?;
539
540 log::debug!(
541 "Saved workspace index to cache: {} files, {} bytes (format v{})",
542 self.files.len(),
543 cache_data.len(),
544 CACHE_FORMAT_VERSION
545 );
546
547 Ok(())
548 }
549
550 #[cfg(feature = "native")]
558 pub fn load_from_cache(cache_dir: &Path) -> Option<Self> {
559 use std::fs;
560
561 let path = cache_dir.join(CACHE_FILE_NAME);
562 let data = fs::read(&path).ok()?;
563
564 if data.len() < 8 {
566 log::warn!("Workspace index cache too small, discarding");
567 let _ = fs::remove_file(&path);
568 return None;
569 }
570
571 if &data[0..4] != CACHE_MAGIC {
573 log::warn!("Workspace index cache has invalid magic header, discarding");
574 let _ = fs::remove_file(&path);
575 return None;
576 }
577
578 let version = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
580 if version != CACHE_FORMAT_VERSION {
581 log::info!(
582 "Workspace index cache format version mismatch (got {version}, expected {CACHE_FORMAT_VERSION}), rebuilding"
583 );
584 let _ = fs::remove_file(&path);
585 return None;
586 }
587
588 match postcard::from_bytes::<Self>(&data[8..]) {
590 Ok(index) => {
591 log::debug!(
592 "Loaded workspace index from cache: {} files (format v{})",
593 index.files.len(),
594 version
595 );
596 Some(index)
597 }
598 Err(e) => {
599 log::warn!("Failed to deserialize workspace index cache: {e}");
600 let _ = fs::remove_file(&path);
601 None
602 }
603 }
604 }
605
606 fn clear_reverse_deps_as_source(&mut self, path: &Path) {
611 for deps in self.reverse_deps.values_mut() {
612 deps.remove(path);
613 }
614 self.reverse_deps.retain(|_, deps| !deps.is_empty());
616 }
617
618 fn clear_reverse_deps_for(&mut self, path: &Path) {
623 self.clear_reverse_deps_as_source(path);
625
626 self.reverse_deps.remove(path);
628 }
629
630 fn resolve_target_path(&self, source_file: &Path, relative_target: &str) -> PathBuf {
632 let source_dir = source_file.parent().unwrap_or(Path::new(""));
634
635 let target = source_dir.join(relative_target);
637
638 Self::normalize_path(&target)
640 }
641
642 fn normalize_path(path: &Path) -> PathBuf {
644 let mut components = Vec::new();
645
646 for component in path.components() {
647 match component {
648 std::path::Component::ParentDir => {
649 if !components.is_empty() {
651 components.pop();
652 }
653 }
654 std::path::Component::CurDir => {
655 }
657 _ => {
658 components.push(component);
659 }
660 }
661 }
662
663 components.iter().collect()
664 }
665}
666
667impl FileIndex {
668 pub fn new() -> Self {
670 Self::default()
671 }
672
673 pub fn with_hash(content_hash: String) -> Self {
675 Self {
676 content_hash,
677 ..Default::default()
678 }
679 }
680
681 pub fn add_heading(&mut self, heading: HeadingIndex) {
685 let index = self.headings.len();
686
687 self.anchor_to_heading.insert(heading.auto_anchor.to_lowercase(), index);
689
690 if let Some(ref custom) = heading.custom_anchor {
692 self.anchor_to_heading.insert(custom.to_lowercase(), index);
693 }
694
695 self.headings.push(heading);
696 }
697
698 pub fn has_anchor(&self, anchor: &str) -> bool {
709 let lower = anchor.to_lowercase();
710
711 if self.anchor_to_heading.contains_key(&lower)
713 || self.html_anchors.contains(&lower)
714 || self.attribute_anchors.contains(&lower)
715 {
716 return true;
717 }
718
719 if anchor.contains('%') {
721 let decoded = url_decode(anchor).to_lowercase();
722 if decoded != lower {
723 return self.anchor_to_heading.contains_key(&decoded)
724 || self.html_anchors.contains(&decoded)
725 || self.attribute_anchors.contains(&decoded);
726 }
727 }
728
729 false
730 }
731
732 pub fn add_html_anchor(&mut self, anchor: String) {
734 if !anchor.is_empty() {
735 self.html_anchors.insert(anchor.to_lowercase());
736 }
737 }
738
739 pub fn add_attribute_anchor(&mut self, anchor: String) {
741 if !anchor.is_empty() {
742 self.attribute_anchors.insert(anchor.to_lowercase());
743 }
744 }
745
746 pub fn get_heading_by_anchor(&self, anchor: &str) -> Option<&HeadingIndex> {
750 self.anchor_to_heading
751 .get(&anchor.to_lowercase())
752 .and_then(|&idx| self.headings.get(idx))
753 }
754
755 pub fn add_reference_link(&mut self, link: ReferenceLinkIndex) {
757 self.reference_links.push(link);
758 }
759
760 pub fn is_rule_disabled_at_line(&self, rule_name: &str, line: usize) -> bool {
765 if self.file_disabled_rules.contains("*") || self.file_disabled_rules.contains(rule_name) {
767 return true;
768 }
769
770 if let Some(rules) = self.line_disabled_rules.get(&line) {
772 return rules.contains("*") || rules.contains(rule_name);
773 }
774
775 false
776 }
777
778 pub fn add_cross_file_link(&mut self, link: CrossFileLinkIndex) {
780 let is_duplicate = self.cross_file_links.iter().any(|existing| {
783 existing.target_path == link.target_path && existing.fragment == link.fragment && existing.line == link.line
784 });
785 if !is_duplicate {
786 self.cross_file_links.push(link);
787 }
788 }
789
790 pub fn add_defined_reference(&mut self, ref_id: String) {
792 self.defined_references.insert(ref_id);
793 }
794
795 pub fn has_defined_reference(&self, ref_id: &str) -> bool {
797 self.defined_references.contains(ref_id)
798 }
799
800 pub fn hash_matches(&self, hash: &str) -> bool {
802 self.content_hash == hash
803 }
804
805 pub fn heading_count(&self) -> usize {
807 self.headings.len()
808 }
809
810 pub fn reference_link_count(&self) -> usize {
812 self.reference_links.len()
813 }
814}
815
816#[cfg(test)]
817mod tests {
818 use super::*;
819
820 #[test]
821 fn test_workspace_index_basic() {
822 let mut index = WorkspaceIndex::new();
823 assert_eq!(index.file_count(), 0);
824 assert_eq!(index.version(), 0);
825
826 let mut file_index = FileIndex::with_hash("abc123".to_string());
827 file_index.add_heading(HeadingIndex {
828 text: "Installation".to_string(),
829 auto_anchor: "installation".to_string(),
830 custom_anchor: None,
831 line: 1,
832 });
833
834 index.insert_file(PathBuf::from("docs/install.md"), file_index);
835 assert_eq!(index.file_count(), 1);
836 assert_eq!(index.version(), 1);
837
838 assert!(index.contains_file(Path::new("docs/install.md")));
839 assert!(!index.contains_file(Path::new("docs/other.md")));
840 }
841
842 #[test]
843 fn test_vulnerable_anchors() {
844 let mut index = WorkspaceIndex::new();
845
846 let mut file1 = FileIndex::new();
848 file1.add_heading(HeadingIndex {
849 text: "Getting Started".to_string(),
850 auto_anchor: "getting-started".to_string(),
851 custom_anchor: None,
852 line: 1,
853 });
854 index.insert_file(PathBuf::from("docs/guide.md"), file1);
855
856 let mut file2 = FileIndex::new();
858 file2.add_heading(HeadingIndex {
859 text: "Installation".to_string(),
860 auto_anchor: "installation".to_string(),
861 custom_anchor: Some("install".to_string()),
862 line: 1,
863 });
864 index.insert_file(PathBuf::from("docs/install.md"), file2);
865
866 let vulnerable = index.get_vulnerable_anchors();
867 assert_eq!(vulnerable.len(), 1);
868 assert!(vulnerable.contains_key("getting-started"));
869 assert!(!vulnerable.contains_key("installation"));
870
871 let anchors = vulnerable.get("getting-started").unwrap();
872 assert_eq!(anchors.len(), 1);
873 assert_eq!(anchors[0].file, PathBuf::from("docs/guide.md"));
874 assert_eq!(anchors[0].text, "Getting Started");
875 }
876
877 #[test]
878 fn test_vulnerable_anchors_multiple_files_same_anchor() {
879 let mut index = WorkspaceIndex::new();
882
883 let mut file1 = FileIndex::new();
885 file1.add_heading(HeadingIndex {
886 text: "Installation".to_string(),
887 auto_anchor: "installation".to_string(),
888 custom_anchor: None,
889 line: 1,
890 });
891 index.insert_file(PathBuf::from("docs/en/guide.md"), file1);
892
893 let mut file2 = FileIndex::new();
895 file2.add_heading(HeadingIndex {
896 text: "Installation".to_string(),
897 auto_anchor: "installation".to_string(),
898 custom_anchor: None,
899 line: 5,
900 });
901 index.insert_file(PathBuf::from("docs/fr/guide.md"), file2);
902
903 let mut file3 = FileIndex::new();
905 file3.add_heading(HeadingIndex {
906 text: "Installation".to_string(),
907 auto_anchor: "installation".to_string(),
908 custom_anchor: Some("install".to_string()),
909 line: 10,
910 });
911 index.insert_file(PathBuf::from("docs/de/guide.md"), file3);
912
913 let vulnerable = index.get_vulnerable_anchors();
914 assert_eq!(vulnerable.len(), 1); assert!(vulnerable.contains_key("installation"));
916
917 let anchors = vulnerable.get("installation").unwrap();
918 assert_eq!(anchors.len(), 2, "Should collect both vulnerable anchors");
920
921 let files: std::collections::HashSet<_> = anchors.iter().map(|a| &a.file).collect();
923 assert!(files.contains(&PathBuf::from("docs/en/guide.md")));
924 assert!(files.contains(&PathBuf::from("docs/fr/guide.md")));
925 }
926
927 #[test]
928 fn test_file_index_hash() {
929 let index = FileIndex::with_hash("hash123".to_string());
930 assert!(index.hash_matches("hash123"));
931 assert!(!index.hash_matches("other"));
932 }
933
934 #[test]
935 fn test_version_increment() {
936 let mut index = WorkspaceIndex::new();
937 assert_eq!(index.version(), 0);
938
939 index.insert_file(PathBuf::from("a.md"), FileIndex::new());
940 assert_eq!(index.version(), 1);
941
942 index.insert_file(PathBuf::from("b.md"), FileIndex::new());
943 assert_eq!(index.version(), 2);
944
945 index.remove_file(Path::new("a.md"));
946 assert_eq!(index.version(), 3);
947
948 index.remove_file(Path::new("nonexistent.md"));
950 assert_eq!(index.version(), 3);
951 }
952
953 #[test]
954 fn test_reverse_deps_basic() {
955 let mut index = WorkspaceIndex::new();
956
957 let mut file_a = FileIndex::new();
959 file_a.add_cross_file_link(CrossFileLinkIndex {
960 target_path: "b.md".to_string(),
961 fragment: "section".to_string(),
962 line: 10,
963 column: 5,
964 });
965 index.update_file(Path::new("docs/a.md"), file_a);
966
967 let dependents = index.get_dependents(Path::new("docs/b.md"));
969 assert_eq!(dependents.len(), 1);
970 assert_eq!(dependents[0], PathBuf::from("docs/a.md"));
971
972 let a_dependents = index.get_dependents(Path::new("docs/a.md"));
974 assert!(a_dependents.is_empty());
975 }
976
977 #[test]
978 fn test_reverse_deps_multiple() {
979 let mut index = WorkspaceIndex::new();
980
981 let mut file_a = FileIndex::new();
983 file_a.add_cross_file_link(CrossFileLinkIndex {
984 target_path: "../b.md".to_string(),
985 fragment: "".to_string(),
986 line: 1,
987 column: 1,
988 });
989 index.update_file(Path::new("docs/sub/a.md"), file_a);
990
991 let mut file_c = FileIndex::new();
992 file_c.add_cross_file_link(CrossFileLinkIndex {
993 target_path: "b.md".to_string(),
994 fragment: "".to_string(),
995 line: 1,
996 column: 1,
997 });
998 index.update_file(Path::new("docs/c.md"), file_c);
999
1000 let dependents = index.get_dependents(Path::new("docs/b.md"));
1002 assert_eq!(dependents.len(), 2);
1003 assert!(dependents.contains(&PathBuf::from("docs/sub/a.md")));
1004 assert!(dependents.contains(&PathBuf::from("docs/c.md")));
1005 }
1006
1007 #[test]
1008 fn test_reverse_deps_update_clears_old() {
1009 let mut index = WorkspaceIndex::new();
1010
1011 let mut file_a = FileIndex::new();
1013 file_a.add_cross_file_link(CrossFileLinkIndex {
1014 target_path: "b.md".to_string(),
1015 fragment: "".to_string(),
1016 line: 1,
1017 column: 1,
1018 });
1019 index.update_file(Path::new("docs/a.md"), file_a);
1020
1021 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1023
1024 let mut file_a_updated = FileIndex::new();
1026 file_a_updated.add_cross_file_link(CrossFileLinkIndex {
1027 target_path: "c.md".to_string(),
1028 fragment: "".to_string(),
1029 line: 1,
1030 column: 1,
1031 });
1032 index.update_file(Path::new("docs/a.md"), file_a_updated);
1033
1034 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1036
1037 let c_deps = index.get_dependents(Path::new("docs/c.md"));
1039 assert_eq!(c_deps.len(), 1);
1040 assert_eq!(c_deps[0], PathBuf::from("docs/a.md"));
1041 }
1042
1043 #[test]
1044 fn test_reverse_deps_remove_file() {
1045 let mut index = WorkspaceIndex::new();
1046
1047 let mut file_a = FileIndex::new();
1049 file_a.add_cross_file_link(CrossFileLinkIndex {
1050 target_path: "b.md".to_string(),
1051 fragment: "".to_string(),
1052 line: 1,
1053 column: 1,
1054 });
1055 index.update_file(Path::new("docs/a.md"), file_a);
1056
1057 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1059
1060 index.remove_file(Path::new("docs/a.md"));
1062
1063 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1065 }
1066
1067 #[test]
1068 fn test_normalize_path() {
1069 let path = Path::new("docs/sub/../other.md");
1071 let normalized = WorkspaceIndex::normalize_path(path);
1072 assert_eq!(normalized, PathBuf::from("docs/other.md"));
1073
1074 let path2 = Path::new("docs/./other.md");
1076 let normalized2 = WorkspaceIndex::normalize_path(path2);
1077 assert_eq!(normalized2, PathBuf::from("docs/other.md"));
1078
1079 let path3 = Path::new("a/b/c/../../d.md");
1081 let normalized3 = WorkspaceIndex::normalize_path(path3);
1082 assert_eq!(normalized3, PathBuf::from("a/d.md"));
1083 }
1084
1085 #[test]
1086 fn test_clear_clears_reverse_deps() {
1087 let mut index = WorkspaceIndex::new();
1088
1089 let mut file_a = FileIndex::new();
1091 file_a.add_cross_file_link(CrossFileLinkIndex {
1092 target_path: "b.md".to_string(),
1093 fragment: "".to_string(),
1094 line: 1,
1095 column: 1,
1096 });
1097 index.update_file(Path::new("docs/a.md"), file_a);
1098
1099 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1101
1102 index.clear();
1104
1105 assert_eq!(index.file_count(), 0);
1107 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1108 }
1109
1110 #[test]
1111 fn test_is_file_stale() {
1112 let mut index = WorkspaceIndex::new();
1113
1114 assert!(index.is_file_stale(Path::new("nonexistent.md"), "hash123"));
1116
1117 let file_index = FileIndex::with_hash("hash123".to_string());
1119 index.insert_file(PathBuf::from("docs/test.md"), file_index);
1120
1121 assert!(!index.is_file_stale(Path::new("docs/test.md"), "hash123"));
1123
1124 assert!(index.is_file_stale(Path::new("docs/test.md"), "different_hash"));
1126 }
1127
1128 #[cfg(feature = "native")]
1129 #[test]
1130 fn test_cache_roundtrip() {
1131 use std::fs;
1132
1133 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_roundtrip");
1135 let _ = fs::remove_dir_all(&temp_dir);
1136 fs::create_dir_all(&temp_dir).unwrap();
1137
1138 let mut index = WorkspaceIndex::new();
1140
1141 let mut file1 = FileIndex::with_hash("abc123".to_string());
1142 file1.add_heading(HeadingIndex {
1143 text: "Test Heading".to_string(),
1144 auto_anchor: "test-heading".to_string(),
1145 custom_anchor: Some("test".to_string()),
1146 line: 1,
1147 });
1148 file1.add_cross_file_link(CrossFileLinkIndex {
1149 target_path: "./other.md".to_string(),
1150 fragment: "section".to_string(),
1151 line: 5,
1152 column: 3,
1153 });
1154 index.update_file(Path::new("docs/file1.md"), file1);
1155
1156 let mut file2 = FileIndex::with_hash("def456".to_string());
1157 file2.add_heading(HeadingIndex {
1158 text: "Another Heading".to_string(),
1159 auto_anchor: "another-heading".to_string(),
1160 custom_anchor: None,
1161 line: 1,
1162 });
1163 index.update_file(Path::new("docs/other.md"), file2);
1164
1165 index.save_to_cache(&temp_dir).expect("Failed to save cache");
1167
1168 assert!(temp_dir.join("workspace_index.bin").exists());
1170
1171 let loaded = WorkspaceIndex::load_from_cache(&temp_dir).expect("Failed to load cache");
1173
1174 assert_eq!(loaded.file_count(), 2);
1176 assert!(loaded.contains_file(Path::new("docs/file1.md")));
1177 assert!(loaded.contains_file(Path::new("docs/other.md")));
1178
1179 let file1_loaded = loaded.get_file(Path::new("docs/file1.md")).unwrap();
1181 assert_eq!(file1_loaded.content_hash, "abc123");
1182 assert_eq!(file1_loaded.headings.len(), 1);
1183 assert_eq!(file1_loaded.headings[0].text, "Test Heading");
1184 assert_eq!(file1_loaded.headings[0].custom_anchor, Some("test".to_string()));
1185 assert_eq!(file1_loaded.cross_file_links.len(), 1);
1186 assert_eq!(file1_loaded.cross_file_links[0].target_path, "./other.md");
1187
1188 let dependents = loaded.get_dependents(Path::new("docs/other.md"));
1190 assert_eq!(dependents.len(), 1);
1191 assert_eq!(dependents[0], PathBuf::from("docs/file1.md"));
1192
1193 let _ = fs::remove_dir_all(&temp_dir);
1195 }
1196
1197 #[cfg(feature = "native")]
1198 #[test]
1199 fn test_cache_missing_file() {
1200 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_missing");
1201 let _ = std::fs::remove_dir_all(&temp_dir);
1202
1203 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1205 assert!(result.is_none());
1206 }
1207
1208 #[cfg(feature = "native")]
1209 #[test]
1210 fn test_cache_corrupted_file() {
1211 use std::fs;
1212
1213 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_corrupted");
1214 let _ = fs::remove_dir_all(&temp_dir);
1215 fs::create_dir_all(&temp_dir).unwrap();
1216
1217 fs::write(temp_dir.join("workspace_index.bin"), b"bad").unwrap();
1219
1220 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1222 assert!(result.is_none());
1223
1224 assert!(!temp_dir.join("workspace_index.bin").exists());
1226
1227 let _ = fs::remove_dir_all(&temp_dir);
1229 }
1230
1231 #[cfg(feature = "native")]
1232 #[test]
1233 fn test_cache_invalid_magic() {
1234 use std::fs;
1235
1236 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_invalid_magic");
1237 let _ = fs::remove_dir_all(&temp_dir);
1238 fs::create_dir_all(&temp_dir).unwrap();
1239
1240 let mut data = Vec::new();
1242 data.extend_from_slice(b"XXXX"); data.extend_from_slice(&1u32.to_le_bytes()); data.extend_from_slice(&[0; 100]); fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
1246
1247 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1249 assert!(result.is_none());
1250
1251 assert!(!temp_dir.join("workspace_index.bin").exists());
1253
1254 let _ = fs::remove_dir_all(&temp_dir);
1256 }
1257
1258 #[cfg(feature = "native")]
1259 #[test]
1260 fn test_cache_version_mismatch() {
1261 use std::fs;
1262
1263 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_version_mismatch");
1264 let _ = fs::remove_dir_all(&temp_dir);
1265 fs::create_dir_all(&temp_dir).unwrap();
1266
1267 let mut data = Vec::new();
1269 data.extend_from_slice(b"RWSI"); data.extend_from_slice(&999u32.to_le_bytes()); data.extend_from_slice(&[0; 100]); fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
1273
1274 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1276 assert!(result.is_none());
1277
1278 assert!(!temp_dir.join("workspace_index.bin").exists());
1280
1281 let _ = fs::remove_dir_all(&temp_dir);
1283 }
1284
1285 #[cfg(feature = "native")]
1286 #[test]
1287 fn test_cache_atomic_write() {
1288 use std::fs;
1289
1290 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_atomic");
1292 let _ = fs::remove_dir_all(&temp_dir);
1293 fs::create_dir_all(&temp_dir).unwrap();
1294
1295 let index = WorkspaceIndex::new();
1296 index.save_to_cache(&temp_dir).expect("Failed to save");
1297
1298 let entries: Vec<_> = fs::read_dir(&temp_dir).unwrap().collect();
1300 assert_eq!(entries.len(), 1);
1301 assert!(temp_dir.join("workspace_index.bin").exists());
1302
1303 let _ = fs::remove_dir_all(&temp_dir);
1305 }
1306
1307 #[test]
1308 fn test_has_anchor_auto_generated() {
1309 let mut file_index = FileIndex::new();
1310 file_index.add_heading(HeadingIndex {
1311 text: "Installation Guide".to_string(),
1312 auto_anchor: "installation-guide".to_string(),
1313 custom_anchor: None,
1314 line: 1,
1315 });
1316
1317 assert!(file_index.has_anchor("installation-guide"));
1319
1320 assert!(file_index.has_anchor("Installation-Guide"));
1322 assert!(file_index.has_anchor("INSTALLATION-GUIDE"));
1323
1324 assert!(!file_index.has_anchor("nonexistent"));
1326 }
1327
1328 #[test]
1329 fn test_has_anchor_custom() {
1330 let mut file_index = FileIndex::new();
1331 file_index.add_heading(HeadingIndex {
1332 text: "Installation Guide".to_string(),
1333 auto_anchor: "installation-guide".to_string(),
1334 custom_anchor: Some("install".to_string()),
1335 line: 1,
1336 });
1337
1338 assert!(file_index.has_anchor("installation-guide"));
1340
1341 assert!(file_index.has_anchor("install"));
1343 assert!(file_index.has_anchor("Install")); assert!(!file_index.has_anchor("nonexistent"));
1347 }
1348
1349 #[test]
1350 fn test_get_heading_by_anchor() {
1351 let mut file_index = FileIndex::new();
1352 file_index.add_heading(HeadingIndex {
1353 text: "Installation Guide".to_string(),
1354 auto_anchor: "installation-guide".to_string(),
1355 custom_anchor: Some("install".to_string()),
1356 line: 10,
1357 });
1358 file_index.add_heading(HeadingIndex {
1359 text: "Configuration".to_string(),
1360 auto_anchor: "configuration".to_string(),
1361 custom_anchor: None,
1362 line: 20,
1363 });
1364
1365 let heading = file_index.get_heading_by_anchor("installation-guide");
1367 assert!(heading.is_some());
1368 assert_eq!(heading.unwrap().text, "Installation Guide");
1369 assert_eq!(heading.unwrap().line, 10);
1370
1371 let heading = file_index.get_heading_by_anchor("install");
1373 assert!(heading.is_some());
1374 assert_eq!(heading.unwrap().text, "Installation Guide");
1375
1376 let heading = file_index.get_heading_by_anchor("configuration");
1378 assert!(heading.is_some());
1379 assert_eq!(heading.unwrap().text, "Configuration");
1380 assert_eq!(heading.unwrap().line, 20);
1381
1382 assert!(file_index.get_heading_by_anchor("nonexistent").is_none());
1384 }
1385
1386 #[test]
1387 fn test_anchor_lookup_many_headings() {
1388 let mut file_index = FileIndex::new();
1390
1391 for i in 0..100 {
1393 file_index.add_heading(HeadingIndex {
1394 text: format!("Heading {i}"),
1395 auto_anchor: format!("heading-{i}"),
1396 custom_anchor: Some(format!("h{i}")),
1397 line: i + 1,
1398 });
1399 }
1400
1401 for i in 0..100 {
1403 assert!(file_index.has_anchor(&format!("heading-{i}")));
1404 assert!(file_index.has_anchor(&format!("h{i}")));
1405
1406 let heading = file_index.get_heading_by_anchor(&format!("heading-{i}"));
1407 assert!(heading.is_some());
1408 assert_eq!(heading.unwrap().line, i + 1);
1409 }
1410 }
1411
1412 #[test]
1417 fn test_extract_cross_file_links_basic() {
1418 use crate::config::MarkdownFlavor;
1419
1420 let content = "# Test\n\nSee [link](./other.md) for info.\n";
1421 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1422 let links = extract_cross_file_links(&ctx);
1423
1424 assert_eq!(links.len(), 1);
1425 assert_eq!(links[0].target_path, "./other.md");
1426 assert_eq!(links[0].fragment, "");
1427 assert_eq!(links[0].line, 3);
1428 assert_eq!(links[0].column, 12);
1430 }
1431
1432 #[test]
1433 fn test_extract_cross_file_links_with_fragment() {
1434 use crate::config::MarkdownFlavor;
1435
1436 let content = "Check [guide](./guide.md#install) here.\n";
1437 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1438 let links = extract_cross_file_links(&ctx);
1439
1440 assert_eq!(links.len(), 1);
1441 assert_eq!(links[0].target_path, "./guide.md");
1442 assert_eq!(links[0].fragment, "install");
1443 assert_eq!(links[0].line, 1);
1444 assert_eq!(links[0].column, 15);
1446 }
1447
1448 #[test]
1449 fn test_extract_cross_file_links_multiple_on_same_line() {
1450 use crate::config::MarkdownFlavor;
1451
1452 let content = "See [a](a.md) and [b](b.md) here.\n";
1453 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1454 let links = extract_cross_file_links(&ctx);
1455
1456 assert_eq!(links.len(), 2);
1457
1458 assert_eq!(links[0].target_path, "a.md");
1459 assert_eq!(links[0].line, 1);
1460 assert_eq!(links[0].column, 9);
1462
1463 assert_eq!(links[1].target_path, "b.md");
1464 assert_eq!(links[1].line, 1);
1465 assert_eq!(links[1].column, 23);
1467 }
1468
1469 #[test]
1470 fn test_extract_cross_file_links_angle_brackets() {
1471 use crate::config::MarkdownFlavor;
1472
1473 let content = "See [link](<path/with (parens).md>) here.\n";
1474 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1475 let links = extract_cross_file_links(&ctx);
1476
1477 assert_eq!(links.len(), 1);
1478 assert_eq!(links[0].target_path, "path/with (parens).md");
1479 assert_eq!(links[0].line, 1);
1480 assert_eq!(links[0].column, 13);
1482 }
1483
1484 #[test]
1485 fn test_extract_cross_file_links_skips_external() {
1486 use crate::config::MarkdownFlavor;
1487
1488 let content = r#"
1489[external](https://example.com)
1490[mailto](mailto:test@example.com)
1491[local](./local.md)
1492[fragment](#section)
1493[absolute](/docs/page.md)
1494"#;
1495 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1496 let links = extract_cross_file_links(&ctx);
1497
1498 assert_eq!(links.len(), 1);
1500 assert_eq!(links[0].target_path, "./local.md");
1501 }
1502
1503 #[test]
1504 fn test_extract_cross_file_links_skips_non_markdown() {
1505 use crate::config::MarkdownFlavor;
1506
1507 let content = r#"
1508[image](./photo.png)
1509[doc](./readme.md)
1510[pdf](./document.pdf)
1511"#;
1512 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1513 let links = extract_cross_file_links(&ctx);
1514
1515 assert_eq!(links.len(), 1);
1517 assert_eq!(links[0].target_path, "./readme.md");
1518 }
1519
1520 #[test]
1521 fn test_extract_cross_file_links_skips_code_spans() {
1522 use crate::config::MarkdownFlavor;
1523
1524 let content = "Normal [link](./file.md) and `[code](./ignored.md)` here.\n";
1525 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1526 let links = extract_cross_file_links(&ctx);
1527
1528 assert_eq!(links.len(), 1);
1530 assert_eq!(links[0].target_path, "./file.md");
1531 }
1532
1533 #[test]
1534 fn test_extract_cross_file_links_with_query_params() {
1535 use crate::config::MarkdownFlavor;
1536
1537 let content = "See [doc](./file.md?raw=true) here.\n";
1538 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1539 let links = extract_cross_file_links(&ctx);
1540
1541 assert_eq!(links.len(), 1);
1542 assert_eq!(links[0].target_path, "./file.md");
1544 }
1545
1546 #[test]
1547 fn test_extract_cross_file_links_empty_content() {
1548 use crate::config::MarkdownFlavor;
1549
1550 let content = "";
1551 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1552 let links = extract_cross_file_links(&ctx);
1553
1554 assert!(links.is_empty());
1555 }
1556
1557 #[test]
1558 fn test_extract_cross_file_links_no_links() {
1559 use crate::config::MarkdownFlavor;
1560
1561 let content = "# Just a heading\n\nSome text without links.\n";
1562 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1563 let links = extract_cross_file_links(&ctx);
1564
1565 assert!(links.is_empty());
1566 }
1567
1568 #[test]
1569 fn test_extract_cross_file_links_position_accuracy_issue_234() {
1570 use crate::config::MarkdownFlavor;
1573
1574 let content = r#"# Test Document
1575
1576Here is a [broken link](nonexistent-file.md) that should trigger MD057.
1577
1578And another [link](also-missing.md) on this line.
1579"#;
1580 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1581 let links = extract_cross_file_links(&ctx);
1582
1583 assert_eq!(links.len(), 2);
1584
1585 assert_eq!(links[0].target_path, "nonexistent-file.md");
1587 assert_eq!(links[0].line, 3);
1588 assert_eq!(links[0].column, 25);
1589
1590 assert_eq!(links[1].target_path, "also-missing.md");
1592 assert_eq!(links[1].line, 5);
1593 assert_eq!(links[1].column, 20);
1594 }
1595}