1use regex::Regex;
22use serde::{Deserialize, Serialize};
23use std::collections::{HashMap, HashSet};
24use std::path::{Path, PathBuf};
25use std::sync::LazyLock;
26
27use crate::lint_context::LintContext;
28
29fn hex_digit_to_value(c: u8) -> Option<u8> {
35 match c {
36 b'0'..=b'9' => Some(c - b'0'),
37 b'a'..=b'f' => Some(c - b'a' + 10),
38 b'A'..=b'F' => Some(c - b'A' + 10),
39 _ => None,
40 }
41}
42
43fn url_decode(s: &str) -> String {
47 if !s.contains('%') {
49 return s.to_string();
50 }
51
52 let bytes = s.as_bytes();
53 let mut result = Vec::with_capacity(bytes.len());
54 let mut i = 0;
55
56 while i < bytes.len() {
57 if bytes[i] == b'%' && i + 2 < bytes.len() {
58 let hex1 = bytes[i + 1];
60 let hex2 = bytes[i + 2];
61 if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
62 result.push(d1 * 16 + d2);
63 i += 3;
64 continue;
65 }
66 }
67 result.push(bytes[i]);
68 i += 1;
69 }
70
71 String::from_utf8(result).unwrap_or_else(|_| s.to_string())
73}
74
75static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
85
86static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
89 LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
90
91static URL_EXTRACT_REGEX: LazyLock<Regex> =
94 LazyLock::new(|| Regex::new(r#"]\(\s*([^>)\s#]+)(#[^)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
95
96pub(crate) static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
98 LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
99
100const MARKDOWN_EXTENSIONS: &[&str] = &[
102 ".md",
103 ".markdown",
104 ".mdx",
105 ".mkd",
106 ".mkdn",
107 ".mdown",
108 ".mdwn",
109 ".qmd",
110 ".rmd",
111];
112
113#[inline]
115fn is_markdown_file(path: &str) -> bool {
116 let path_lower = path.to_lowercase();
117 MARKDOWN_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext))
118}
119
120fn strip_query_and_fragment(url: &str) -> &str {
123 let query_pos = url.find('?');
124 let fragment_pos = url.find('#');
125
126 match (query_pos, fragment_pos) {
127 (Some(q), Some(f)) => &url[..q.min(f)],
128 (Some(q), None) => &url[..q],
129 (None, Some(f)) => &url[..f],
130 (None, None) => url,
131 }
132}
133
134pub fn extract_cross_file_links(ctx: &LintContext) -> Vec<CrossFileLinkIndex> {
142 let content = ctx.content;
143
144 if content.is_empty() || !content.contains("](") {
146 return Vec::new();
147 }
148
149 let mut links = Vec::new();
150 let lines: Vec<&str> = content.lines().collect();
151 let line_index = &ctx.line_index;
152
153 let mut processed_lines = HashSet::new();
156
157 for link in &ctx.links {
158 let line_idx = link.line - 1;
159 if line_idx >= lines.len() {
160 continue;
161 }
162
163 if !processed_lines.insert(line_idx) {
165 continue;
166 }
167
168 let line = lines[line_idx];
169 if !line.contains("](") {
170 continue;
171 }
172
173 for link_match in LINK_START_REGEX.find_iter(line) {
175 let start_pos = link_match.start();
176 let end_pos = link_match.end();
177
178 let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
180 let absolute_start_pos = line_start_byte + start_pos;
181
182 if ctx.is_in_code_span_byte(absolute_start_pos) {
184 continue;
185 }
186
187 let caps_result = URL_EXTRACT_ANGLE_BRACKET_REGEX
190 .captures_at(line, end_pos - 1)
191 .or_else(|| URL_EXTRACT_REGEX.captures_at(line, end_pos - 1));
192
193 if let Some(caps) = caps_result
194 && let Some(url_group) = caps.get(1)
195 {
196 let file_path = url_group.as_str().trim();
197
198 if file_path.is_empty()
201 || PROTOCOL_DOMAIN_REGEX.is_match(file_path)
202 || file_path.starts_with("www.")
203 || file_path.starts_with('#')
204 || file_path.starts_with("{{")
205 || file_path.starts_with("{%")
206 || file_path.starts_with('/')
207 || file_path.starts_with('~')
208 || file_path.starts_with('@')
209 || (file_path.starts_with('`') && file_path.ends_with('`'))
210 {
211 continue;
212 }
213
214 let file_path = strip_query_and_fragment(file_path);
216
217 let fragment = caps.get(2).map(|m| m.as_str().trim_start_matches('#')).unwrap_or("");
219
220 if is_markdown_file(file_path) {
222 links.push(CrossFileLinkIndex {
223 target_path: file_path.to_string(),
224 fragment: fragment.to_string(),
225 line: link.line,
226 column: url_group.start() + 1,
227 });
228 }
229 }
230 }
231 }
232
233 links
234}
235
236#[cfg(feature = "native")]
238const CACHE_MAGIC: &[u8; 4] = b"RWSI";
239
240#[cfg(feature = "native")]
242const CACHE_FORMAT_VERSION: u32 = 5;
243
244#[cfg(feature = "native")]
246const CACHE_FILE_NAME: &str = "workspace_index.bin";
247
248#[derive(Debug, Default, Clone, Serialize, Deserialize)]
253pub struct WorkspaceIndex {
254 files: HashMap<PathBuf, FileIndex>,
256 reverse_deps: HashMap<PathBuf, HashSet<PathBuf>>,
259 version: u64,
261}
262
263#[derive(Debug, Clone, Default, Serialize, Deserialize)]
265pub struct FileIndex {
266 pub headings: Vec<HeadingIndex>,
268 pub reference_links: Vec<ReferenceLinkIndex>,
270 pub cross_file_links: Vec<CrossFileLinkIndex>,
272 pub defined_references: HashSet<String>,
275 pub content_hash: String,
277 anchor_to_heading: HashMap<String, usize>,
280 html_anchors: HashSet<String>,
283 attribute_anchors: HashSet<String>,
287 pub file_disabled_rules: HashSet<String>,
290 pub persistent_transitions: Vec<(usize, HashSet<String>, HashSet<String>)>,
293 pub line_disabled_rules: HashMap<usize, HashSet<String>>,
295}
296
297#[derive(Debug, Clone, Serialize, Deserialize)]
299pub struct HeadingIndex {
300 pub text: String,
302 pub auto_anchor: String,
304 pub custom_anchor: Option<String>,
306 pub line: usize,
308 #[serde(default)]
310 pub is_setext: bool,
311}
312
313#[derive(Debug, Clone, Serialize, Deserialize)]
315pub struct ReferenceLinkIndex {
316 pub reference_id: String,
318 pub line: usize,
320 pub column: usize,
322}
323
324#[derive(Debug, Clone, Serialize, Deserialize)]
326pub struct CrossFileLinkIndex {
327 pub target_path: String,
329 pub fragment: String,
331 pub line: usize,
333 pub column: usize,
335}
336
337#[derive(Debug, Clone, Serialize, Deserialize)]
339pub struct VulnerableAnchor {
340 pub file: PathBuf,
342 pub line: usize,
344 pub text: String,
346}
347
348impl WorkspaceIndex {
349 pub fn new() -> Self {
351 Self::default()
352 }
353
354 pub fn version(&self) -> u64 {
356 self.version
357 }
358
359 pub fn file_count(&self) -> usize {
361 self.files.len()
362 }
363
364 pub fn contains_file(&self, path: &Path) -> bool {
366 self.files.contains_key(path)
367 }
368
369 pub fn get_file(&self, path: &Path) -> Option<&FileIndex> {
371 self.files.get(path)
372 }
373
374 pub fn insert_file(&mut self, path: PathBuf, index: FileIndex) {
376 self.files.insert(path, index);
377 self.version = self.version.wrapping_add(1);
378 }
379
380 pub fn remove_file(&mut self, path: &Path) -> Option<FileIndex> {
382 self.clear_reverse_deps_for(path);
384
385 let result = self.files.remove(path);
386 if result.is_some() {
387 self.version = self.version.wrapping_add(1);
388 }
389 result
390 }
391
392 pub fn get_vulnerable_anchors(&self) -> HashMap<String, Vec<VulnerableAnchor>> {
402 let mut vulnerable: HashMap<String, Vec<VulnerableAnchor>> = HashMap::new();
403
404 for (file_path, file_index) in &self.files {
405 for heading in &file_index.headings {
406 if heading.custom_anchor.is_none() && !heading.auto_anchor.is_empty() {
408 let anchor_key = heading.auto_anchor.to_lowercase();
409 vulnerable.entry(anchor_key).or_default().push(VulnerableAnchor {
410 file: file_path.clone(),
411 line: heading.line,
412 text: heading.text.clone(),
413 });
414 }
415 }
416 }
417
418 vulnerable
419 }
420
421 pub fn all_headings(&self) -> impl Iterator<Item = (&Path, &HeadingIndex)> {
423 self.files
424 .iter()
425 .flat_map(|(path, index)| index.headings.iter().map(move |h| (path.as_path(), h)))
426 }
427
428 pub fn files(&self) -> impl Iterator<Item = (&Path, &FileIndex)> {
430 self.files.iter().map(|(p, i)| (p.as_path(), i))
431 }
432
433 pub fn clear(&mut self) {
435 self.files.clear();
436 self.reverse_deps.clear();
437 self.version = self.version.wrapping_add(1);
438 }
439
440 pub fn update_file(&mut self, path: &Path, index: FileIndex) {
447 self.clear_reverse_deps_as_source(path);
450
451 for link in &index.cross_file_links {
453 let target = self.resolve_target_path(path, &link.target_path);
454 self.reverse_deps.entry(target).or_default().insert(path.to_path_buf());
455 }
456
457 self.files.insert(path.to_path_buf(), index);
458 self.version = self.version.wrapping_add(1);
459 }
460
461 pub fn get_dependents(&self, path: &Path) -> Vec<PathBuf> {
466 self.reverse_deps
467 .get(path)
468 .map(|set| set.iter().cloned().collect())
469 .unwrap_or_default()
470 }
471
472 pub fn is_file_stale(&self, path: &Path, current_hash: &str) -> bool {
476 self.files
477 .get(path)
478 .map(|f| f.content_hash != current_hash)
479 .unwrap_or(true)
480 }
481
482 pub fn retain_only(&mut self, current_files: &std::collections::HashSet<PathBuf>) -> usize {
487 let before_count = self.files.len();
488
489 let to_remove: Vec<PathBuf> = self
491 .files
492 .keys()
493 .filter(|path| !current_files.contains(*path))
494 .cloned()
495 .collect();
496
497 for path in &to_remove {
499 self.remove_file(path);
500 }
501
502 before_count - self.files.len()
503 }
504
505 #[cfg(feature = "native")]
512 pub fn save_to_cache(&self, cache_dir: &Path) -> std::io::Result<()> {
513 use std::fs;
514 use std::io::Write;
515
516 fs::create_dir_all(cache_dir)?;
518
519 let encoded = postcard::to_allocvec(self)
521 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
522
523 let mut cache_data = Vec::with_capacity(8 + encoded.len());
525 cache_data.extend_from_slice(CACHE_MAGIC);
526 cache_data.extend_from_slice(&CACHE_FORMAT_VERSION.to_le_bytes());
527 cache_data.extend_from_slice(&encoded);
528
529 let final_path = cache_dir.join(CACHE_FILE_NAME);
531 let temp_path = cache_dir.join(format!("{}.tmp.{}", CACHE_FILE_NAME, std::process::id()));
532
533 {
535 let mut file = fs::File::create(&temp_path)?;
536 file.write_all(&cache_data)?;
537 file.sync_all()?;
538 }
539
540 fs::rename(&temp_path, &final_path)?;
542
543 log::debug!(
544 "Saved workspace index to cache: {} files, {} bytes (format v{})",
545 self.files.len(),
546 cache_data.len(),
547 CACHE_FORMAT_VERSION
548 );
549
550 Ok(())
551 }
552
553 #[cfg(feature = "native")]
561 pub fn load_from_cache(cache_dir: &Path) -> Option<Self> {
562 use std::fs;
563
564 let path = cache_dir.join(CACHE_FILE_NAME);
565 let data = fs::read(&path).ok()?;
566
567 if data.len() < 8 {
569 log::warn!("Workspace index cache too small, discarding");
570 let _ = fs::remove_file(&path);
571 return None;
572 }
573
574 if &data[0..4] != CACHE_MAGIC {
576 log::warn!("Workspace index cache has invalid magic header, discarding");
577 let _ = fs::remove_file(&path);
578 return None;
579 }
580
581 let version = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
583 if version != CACHE_FORMAT_VERSION {
584 log::info!(
585 "Workspace index cache format version mismatch (got {version}, expected {CACHE_FORMAT_VERSION}), rebuilding"
586 );
587 let _ = fs::remove_file(&path);
588 return None;
589 }
590
591 match postcard::from_bytes::<Self>(&data[8..]) {
593 Ok(index) => {
594 log::debug!(
595 "Loaded workspace index from cache: {} files (format v{})",
596 index.files.len(),
597 version
598 );
599 Some(index)
600 }
601 Err(e) => {
602 log::warn!("Failed to deserialize workspace index cache: {e}");
603 let _ = fs::remove_file(&path);
604 None
605 }
606 }
607 }
608
609 fn clear_reverse_deps_as_source(&mut self, path: &Path) {
614 for deps in self.reverse_deps.values_mut() {
615 deps.remove(path);
616 }
617 self.reverse_deps.retain(|_, deps| !deps.is_empty());
619 }
620
621 fn clear_reverse_deps_for(&mut self, path: &Path) {
626 self.clear_reverse_deps_as_source(path);
628
629 self.reverse_deps.remove(path);
631 }
632
633 fn resolve_target_path(&self, source_file: &Path, relative_target: &str) -> PathBuf {
635 let source_dir = source_file.parent().unwrap_or(Path::new(""));
637
638 let target = source_dir.join(relative_target);
640
641 Self::normalize_path(&target)
643 }
644
645 fn normalize_path(path: &Path) -> PathBuf {
647 let mut components = Vec::new();
648
649 for component in path.components() {
650 match component {
651 std::path::Component::ParentDir => {
652 if !components.is_empty() {
654 components.pop();
655 }
656 }
657 std::path::Component::CurDir => {
658 }
660 _ => {
661 components.push(component);
662 }
663 }
664 }
665
666 components.iter().collect()
667 }
668}
669
670impl FileIndex {
671 pub fn new() -> Self {
673 Self::default()
674 }
675
676 pub fn with_hash(content_hash: String) -> Self {
678 Self {
679 content_hash,
680 ..Default::default()
681 }
682 }
683
684 pub fn add_heading(&mut self, heading: HeadingIndex) {
688 let index = self.headings.len();
689
690 self.anchor_to_heading.insert(heading.auto_anchor.to_lowercase(), index);
692
693 if let Some(ref custom) = heading.custom_anchor {
695 self.anchor_to_heading.insert(custom.to_lowercase(), index);
696 }
697
698 self.headings.push(heading);
699 }
700
701 pub fn add_anchor_alias(&mut self, anchor: String, heading_index: usize) {
704 if heading_index < self.headings.len() {
705 self.anchor_to_heading.insert(anchor.to_lowercase(), heading_index);
706 }
707 }
708
709 pub fn has_anchor(&self, anchor: &str) -> bool {
720 let lower = anchor.to_lowercase();
721
722 if self.anchor_to_heading.contains_key(&lower)
724 || self.html_anchors.contains(&lower)
725 || self.attribute_anchors.contains(&lower)
726 {
727 return true;
728 }
729
730 if anchor.contains('%') {
732 let decoded = url_decode(anchor).to_lowercase();
733 if decoded != lower {
734 return self.anchor_to_heading.contains_key(&decoded)
735 || self.html_anchors.contains(&decoded)
736 || self.attribute_anchors.contains(&decoded);
737 }
738 }
739
740 false
741 }
742
743 pub fn add_html_anchor(&mut self, anchor: String) {
745 if !anchor.is_empty() {
746 self.html_anchors.insert(anchor.to_lowercase());
747 }
748 }
749
750 pub fn add_attribute_anchor(&mut self, anchor: String) {
752 if !anchor.is_empty() {
753 self.attribute_anchors.insert(anchor.to_lowercase());
754 }
755 }
756
757 pub fn get_heading_by_anchor(&self, anchor: &str) -> Option<&HeadingIndex> {
761 self.anchor_to_heading
762 .get(&anchor.to_lowercase())
763 .and_then(|&idx| self.headings.get(idx))
764 }
765
766 pub fn add_reference_link(&mut self, link: ReferenceLinkIndex) {
768 self.reference_links.push(link);
769 }
770
771 pub fn is_rule_disabled_at_line(&self, rule_name: &str, line: usize) -> bool {
776 if self.file_disabled_rules.contains("*") || self.file_disabled_rules.contains(rule_name) {
778 return true;
779 }
780
781 if let Some(rules) = self.line_disabled_rules.get(&line)
783 && (rules.contains("*") || rules.contains(rule_name))
784 {
785 return true;
786 }
787
788 if !self.persistent_transitions.is_empty() {
790 let idx = match self.persistent_transitions.binary_search_by_key(&line, |t| t.0) {
791 Ok(i) => Some(i),
792 Err(i) => {
793 if i > 0 {
794 Some(i - 1)
795 } else {
796 None
797 }
798 }
799 };
800 if let Some(i) = idx {
801 let (_, ref disabled, ref enabled) = self.persistent_transitions[i];
802 if disabled.contains("*") {
803 return !enabled.contains(rule_name);
804 }
805 return disabled.contains(rule_name);
806 }
807 }
808
809 false
810 }
811
812 pub fn add_cross_file_link(&mut self, link: CrossFileLinkIndex) {
814 let is_duplicate = self.cross_file_links.iter().any(|existing| {
817 existing.target_path == link.target_path && existing.fragment == link.fragment && existing.line == link.line
818 });
819 if !is_duplicate {
820 self.cross_file_links.push(link);
821 }
822 }
823
824 pub fn add_defined_reference(&mut self, ref_id: String) {
826 self.defined_references.insert(ref_id);
827 }
828
829 pub fn has_defined_reference(&self, ref_id: &str) -> bool {
831 self.defined_references.contains(ref_id)
832 }
833
834 pub fn hash_matches(&self, hash: &str) -> bool {
836 self.content_hash == hash
837 }
838
839 pub fn heading_count(&self) -> usize {
841 self.headings.len()
842 }
843
844 pub fn reference_link_count(&self) -> usize {
846 self.reference_links.len()
847 }
848}
849
850#[cfg(test)]
851mod tests {
852 use super::*;
853
854 #[test]
855 fn test_workspace_index_basic() {
856 let mut index = WorkspaceIndex::new();
857 assert_eq!(index.file_count(), 0);
858 assert_eq!(index.version(), 0);
859
860 let mut file_index = FileIndex::with_hash("abc123".to_string());
861 file_index.add_heading(HeadingIndex {
862 text: "Installation".to_string(),
863 auto_anchor: "installation".to_string(),
864 custom_anchor: None,
865 line: 1,
866 is_setext: false,
867 });
868
869 index.insert_file(PathBuf::from("docs/install.md"), file_index);
870 assert_eq!(index.file_count(), 1);
871 assert_eq!(index.version(), 1);
872
873 assert!(index.contains_file(Path::new("docs/install.md")));
874 assert!(!index.contains_file(Path::new("docs/other.md")));
875 }
876
877 #[test]
878 fn test_vulnerable_anchors() {
879 let mut index = WorkspaceIndex::new();
880
881 let mut file1 = FileIndex::new();
883 file1.add_heading(HeadingIndex {
884 text: "Getting Started".to_string(),
885 auto_anchor: "getting-started".to_string(),
886 custom_anchor: None,
887 line: 1,
888 is_setext: false,
889 });
890 index.insert_file(PathBuf::from("docs/guide.md"), file1);
891
892 let mut file2 = FileIndex::new();
894 file2.add_heading(HeadingIndex {
895 text: "Installation".to_string(),
896 auto_anchor: "installation".to_string(),
897 custom_anchor: Some("install".to_string()),
898 line: 1,
899 is_setext: false,
900 });
901 index.insert_file(PathBuf::from("docs/install.md"), file2);
902
903 let vulnerable = index.get_vulnerable_anchors();
904 assert_eq!(vulnerable.len(), 1);
905 assert!(vulnerable.contains_key("getting-started"));
906 assert!(!vulnerable.contains_key("installation"));
907
908 let anchors = vulnerable.get("getting-started").unwrap();
909 assert_eq!(anchors.len(), 1);
910 assert_eq!(anchors[0].file, PathBuf::from("docs/guide.md"));
911 assert_eq!(anchors[0].text, "Getting Started");
912 }
913
914 #[test]
915 fn test_vulnerable_anchors_multiple_files_same_anchor() {
916 let mut index = WorkspaceIndex::new();
919
920 let mut file1 = FileIndex::new();
922 file1.add_heading(HeadingIndex {
923 text: "Installation".to_string(),
924 auto_anchor: "installation".to_string(),
925 custom_anchor: None,
926 line: 1,
927 is_setext: false,
928 });
929 index.insert_file(PathBuf::from("docs/en/guide.md"), file1);
930
931 let mut file2 = FileIndex::new();
933 file2.add_heading(HeadingIndex {
934 text: "Installation".to_string(),
935 auto_anchor: "installation".to_string(),
936 custom_anchor: None,
937 line: 5,
938 is_setext: false,
939 });
940 index.insert_file(PathBuf::from("docs/fr/guide.md"), file2);
941
942 let mut file3 = FileIndex::new();
944 file3.add_heading(HeadingIndex {
945 text: "Installation".to_string(),
946 auto_anchor: "installation".to_string(),
947 custom_anchor: Some("install".to_string()),
948 line: 10,
949 is_setext: false,
950 });
951 index.insert_file(PathBuf::from("docs/de/guide.md"), file3);
952
953 let vulnerable = index.get_vulnerable_anchors();
954 assert_eq!(vulnerable.len(), 1); assert!(vulnerable.contains_key("installation"));
956
957 let anchors = vulnerable.get("installation").unwrap();
958 assert_eq!(anchors.len(), 2, "Should collect both vulnerable anchors");
960
961 let files: std::collections::HashSet<_> = anchors.iter().map(|a| &a.file).collect();
963 assert!(files.contains(&PathBuf::from("docs/en/guide.md")));
964 assert!(files.contains(&PathBuf::from("docs/fr/guide.md")));
965 }
966
967 #[test]
968 fn test_file_index_hash() {
969 let index = FileIndex::with_hash("hash123".to_string());
970 assert!(index.hash_matches("hash123"));
971 assert!(!index.hash_matches("other"));
972 }
973
974 #[test]
975 fn test_version_increment() {
976 let mut index = WorkspaceIndex::new();
977 assert_eq!(index.version(), 0);
978
979 index.insert_file(PathBuf::from("a.md"), FileIndex::new());
980 assert_eq!(index.version(), 1);
981
982 index.insert_file(PathBuf::from("b.md"), FileIndex::new());
983 assert_eq!(index.version(), 2);
984
985 index.remove_file(Path::new("a.md"));
986 assert_eq!(index.version(), 3);
987
988 index.remove_file(Path::new("nonexistent.md"));
990 assert_eq!(index.version(), 3);
991 }
992
993 #[test]
994 fn test_reverse_deps_basic() {
995 let mut index = WorkspaceIndex::new();
996
997 let mut file_a = FileIndex::new();
999 file_a.add_cross_file_link(CrossFileLinkIndex {
1000 target_path: "b.md".to_string(),
1001 fragment: "section".to_string(),
1002 line: 10,
1003 column: 5,
1004 });
1005 index.update_file(Path::new("docs/a.md"), file_a);
1006
1007 let dependents = index.get_dependents(Path::new("docs/b.md"));
1009 assert_eq!(dependents.len(), 1);
1010 assert_eq!(dependents[0], PathBuf::from("docs/a.md"));
1011
1012 let a_dependents = index.get_dependents(Path::new("docs/a.md"));
1014 assert!(a_dependents.is_empty());
1015 }
1016
1017 #[test]
1018 fn test_reverse_deps_multiple() {
1019 let mut index = WorkspaceIndex::new();
1020
1021 let mut file_a = FileIndex::new();
1023 file_a.add_cross_file_link(CrossFileLinkIndex {
1024 target_path: "../b.md".to_string(),
1025 fragment: "".to_string(),
1026 line: 1,
1027 column: 1,
1028 });
1029 index.update_file(Path::new("docs/sub/a.md"), file_a);
1030
1031 let mut file_c = FileIndex::new();
1032 file_c.add_cross_file_link(CrossFileLinkIndex {
1033 target_path: "b.md".to_string(),
1034 fragment: "".to_string(),
1035 line: 1,
1036 column: 1,
1037 });
1038 index.update_file(Path::new("docs/c.md"), file_c);
1039
1040 let dependents = index.get_dependents(Path::new("docs/b.md"));
1042 assert_eq!(dependents.len(), 2);
1043 assert!(dependents.contains(&PathBuf::from("docs/sub/a.md")));
1044 assert!(dependents.contains(&PathBuf::from("docs/c.md")));
1045 }
1046
1047 #[test]
1048 fn test_reverse_deps_update_clears_old() {
1049 let mut index = WorkspaceIndex::new();
1050
1051 let mut file_a = FileIndex::new();
1053 file_a.add_cross_file_link(CrossFileLinkIndex {
1054 target_path: "b.md".to_string(),
1055 fragment: "".to_string(),
1056 line: 1,
1057 column: 1,
1058 });
1059 index.update_file(Path::new("docs/a.md"), file_a);
1060
1061 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1063
1064 let mut file_a_updated = FileIndex::new();
1066 file_a_updated.add_cross_file_link(CrossFileLinkIndex {
1067 target_path: "c.md".to_string(),
1068 fragment: "".to_string(),
1069 line: 1,
1070 column: 1,
1071 });
1072 index.update_file(Path::new("docs/a.md"), file_a_updated);
1073
1074 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1076
1077 let c_deps = index.get_dependents(Path::new("docs/c.md"));
1079 assert_eq!(c_deps.len(), 1);
1080 assert_eq!(c_deps[0], PathBuf::from("docs/a.md"));
1081 }
1082
1083 #[test]
1084 fn test_reverse_deps_remove_file() {
1085 let mut index = WorkspaceIndex::new();
1086
1087 let mut file_a = FileIndex::new();
1089 file_a.add_cross_file_link(CrossFileLinkIndex {
1090 target_path: "b.md".to_string(),
1091 fragment: "".to_string(),
1092 line: 1,
1093 column: 1,
1094 });
1095 index.update_file(Path::new("docs/a.md"), file_a);
1096
1097 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1099
1100 index.remove_file(Path::new("docs/a.md"));
1102
1103 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1105 }
1106
1107 #[test]
1108 fn test_normalize_path() {
1109 let path = Path::new("docs/sub/../other.md");
1111 let normalized = WorkspaceIndex::normalize_path(path);
1112 assert_eq!(normalized, PathBuf::from("docs/other.md"));
1113
1114 let path2 = Path::new("docs/./other.md");
1116 let normalized2 = WorkspaceIndex::normalize_path(path2);
1117 assert_eq!(normalized2, PathBuf::from("docs/other.md"));
1118
1119 let path3 = Path::new("a/b/c/../../d.md");
1121 let normalized3 = WorkspaceIndex::normalize_path(path3);
1122 assert_eq!(normalized3, PathBuf::from("a/d.md"));
1123 }
1124
1125 #[test]
1126 fn test_clear_clears_reverse_deps() {
1127 let mut index = WorkspaceIndex::new();
1128
1129 let mut file_a = FileIndex::new();
1131 file_a.add_cross_file_link(CrossFileLinkIndex {
1132 target_path: "b.md".to_string(),
1133 fragment: "".to_string(),
1134 line: 1,
1135 column: 1,
1136 });
1137 index.update_file(Path::new("docs/a.md"), file_a);
1138
1139 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1141
1142 index.clear();
1144
1145 assert_eq!(index.file_count(), 0);
1147 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1148 }
1149
1150 #[test]
1151 fn test_is_file_stale() {
1152 let mut index = WorkspaceIndex::new();
1153
1154 assert!(index.is_file_stale(Path::new("nonexistent.md"), "hash123"));
1156
1157 let file_index = FileIndex::with_hash("hash123".to_string());
1159 index.insert_file(PathBuf::from("docs/test.md"), file_index);
1160
1161 assert!(!index.is_file_stale(Path::new("docs/test.md"), "hash123"));
1163
1164 assert!(index.is_file_stale(Path::new("docs/test.md"), "different_hash"));
1166 }
1167
1168 #[cfg(feature = "native")]
1169 #[test]
1170 fn test_cache_roundtrip() {
1171 use std::fs;
1172
1173 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_roundtrip");
1175 let _ = fs::remove_dir_all(&temp_dir);
1176 fs::create_dir_all(&temp_dir).unwrap();
1177
1178 let mut index = WorkspaceIndex::new();
1180
1181 let mut file1 = FileIndex::with_hash("abc123".to_string());
1182 file1.add_heading(HeadingIndex {
1183 text: "Test Heading".to_string(),
1184 auto_anchor: "test-heading".to_string(),
1185 custom_anchor: Some("test".to_string()),
1186 line: 1,
1187 is_setext: false,
1188 });
1189 file1.add_cross_file_link(CrossFileLinkIndex {
1190 target_path: "./other.md".to_string(),
1191 fragment: "section".to_string(),
1192 line: 5,
1193 column: 3,
1194 });
1195 index.update_file(Path::new("docs/file1.md"), file1);
1196
1197 let mut file2 = FileIndex::with_hash("def456".to_string());
1198 file2.add_heading(HeadingIndex {
1199 text: "Another Heading".to_string(),
1200 auto_anchor: "another-heading".to_string(),
1201 custom_anchor: None,
1202 line: 1,
1203 is_setext: false,
1204 });
1205 index.update_file(Path::new("docs/other.md"), file2);
1206
1207 index.save_to_cache(&temp_dir).expect("Failed to save cache");
1209
1210 assert!(temp_dir.join("workspace_index.bin").exists());
1212
1213 let loaded = WorkspaceIndex::load_from_cache(&temp_dir).expect("Failed to load cache");
1215
1216 assert_eq!(loaded.file_count(), 2);
1218 assert!(loaded.contains_file(Path::new("docs/file1.md")));
1219 assert!(loaded.contains_file(Path::new("docs/other.md")));
1220
1221 let file1_loaded = loaded.get_file(Path::new("docs/file1.md")).unwrap();
1223 assert_eq!(file1_loaded.content_hash, "abc123");
1224 assert_eq!(file1_loaded.headings.len(), 1);
1225 assert_eq!(file1_loaded.headings[0].text, "Test Heading");
1226 assert_eq!(file1_loaded.headings[0].custom_anchor, Some("test".to_string()));
1227 assert_eq!(file1_loaded.cross_file_links.len(), 1);
1228 assert_eq!(file1_loaded.cross_file_links[0].target_path, "./other.md");
1229
1230 let dependents = loaded.get_dependents(Path::new("docs/other.md"));
1232 assert_eq!(dependents.len(), 1);
1233 assert_eq!(dependents[0], PathBuf::from("docs/file1.md"));
1234
1235 let _ = fs::remove_dir_all(&temp_dir);
1237 }
1238
1239 #[cfg(feature = "native")]
1240 #[test]
1241 fn test_cache_missing_file() {
1242 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_missing");
1243 let _ = std::fs::remove_dir_all(&temp_dir);
1244
1245 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1247 assert!(result.is_none());
1248 }
1249
1250 #[cfg(feature = "native")]
1251 #[test]
1252 fn test_cache_corrupted_file() {
1253 use std::fs;
1254
1255 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_corrupted");
1256 let _ = fs::remove_dir_all(&temp_dir);
1257 fs::create_dir_all(&temp_dir).unwrap();
1258
1259 fs::write(temp_dir.join("workspace_index.bin"), b"bad").unwrap();
1261
1262 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1264 assert!(result.is_none());
1265
1266 assert!(!temp_dir.join("workspace_index.bin").exists());
1268
1269 let _ = fs::remove_dir_all(&temp_dir);
1271 }
1272
1273 #[cfg(feature = "native")]
1274 #[test]
1275 fn test_cache_invalid_magic() {
1276 use std::fs;
1277
1278 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_invalid_magic");
1279 let _ = fs::remove_dir_all(&temp_dir);
1280 fs::create_dir_all(&temp_dir).unwrap();
1281
1282 let mut data = Vec::new();
1284 data.extend_from_slice(b"XXXX"); data.extend_from_slice(&1u32.to_le_bytes()); data.extend_from_slice(&[0; 100]); fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
1288
1289 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1291 assert!(result.is_none());
1292
1293 assert!(!temp_dir.join("workspace_index.bin").exists());
1295
1296 let _ = fs::remove_dir_all(&temp_dir);
1298 }
1299
1300 #[cfg(feature = "native")]
1301 #[test]
1302 fn test_cache_version_mismatch() {
1303 use std::fs;
1304
1305 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_version_mismatch");
1306 let _ = fs::remove_dir_all(&temp_dir);
1307 fs::create_dir_all(&temp_dir).unwrap();
1308
1309 let mut data = Vec::new();
1311 data.extend_from_slice(b"RWSI"); data.extend_from_slice(&999u32.to_le_bytes()); data.extend_from_slice(&[0; 100]); fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
1315
1316 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1318 assert!(result.is_none());
1319
1320 assert!(!temp_dir.join("workspace_index.bin").exists());
1322
1323 let _ = fs::remove_dir_all(&temp_dir);
1325 }
1326
1327 #[cfg(feature = "native")]
1328 #[test]
1329 fn test_cache_atomic_write() {
1330 use std::fs;
1331
1332 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_atomic");
1334 let _ = fs::remove_dir_all(&temp_dir);
1335 fs::create_dir_all(&temp_dir).unwrap();
1336
1337 let index = WorkspaceIndex::new();
1338 index.save_to_cache(&temp_dir).expect("Failed to save");
1339
1340 let entries: Vec<_> = fs::read_dir(&temp_dir).unwrap().collect();
1342 assert_eq!(entries.len(), 1);
1343 assert!(temp_dir.join("workspace_index.bin").exists());
1344
1345 let _ = fs::remove_dir_all(&temp_dir);
1347 }
1348
1349 #[test]
1350 fn test_has_anchor_auto_generated() {
1351 let mut file_index = FileIndex::new();
1352 file_index.add_heading(HeadingIndex {
1353 text: "Installation Guide".to_string(),
1354 auto_anchor: "installation-guide".to_string(),
1355 custom_anchor: None,
1356 line: 1,
1357 is_setext: false,
1358 });
1359
1360 assert!(file_index.has_anchor("installation-guide"));
1362
1363 assert!(file_index.has_anchor("Installation-Guide"));
1365 assert!(file_index.has_anchor("INSTALLATION-GUIDE"));
1366
1367 assert!(!file_index.has_anchor("nonexistent"));
1369 }
1370
1371 #[test]
1372 fn test_has_anchor_custom() {
1373 let mut file_index = FileIndex::new();
1374 file_index.add_heading(HeadingIndex {
1375 text: "Installation Guide".to_string(),
1376 auto_anchor: "installation-guide".to_string(),
1377 custom_anchor: Some("install".to_string()),
1378 line: 1,
1379 is_setext: false,
1380 });
1381
1382 assert!(file_index.has_anchor("installation-guide"));
1384
1385 assert!(file_index.has_anchor("install"));
1387 assert!(file_index.has_anchor("Install")); assert!(!file_index.has_anchor("nonexistent"));
1391 }
1392
1393 #[test]
1394 fn test_get_heading_by_anchor() {
1395 let mut file_index = FileIndex::new();
1396 file_index.add_heading(HeadingIndex {
1397 text: "Installation Guide".to_string(),
1398 auto_anchor: "installation-guide".to_string(),
1399 custom_anchor: Some("install".to_string()),
1400 line: 10,
1401 is_setext: false,
1402 });
1403 file_index.add_heading(HeadingIndex {
1404 text: "Configuration".to_string(),
1405 auto_anchor: "configuration".to_string(),
1406 custom_anchor: None,
1407 line: 20,
1408 is_setext: false,
1409 });
1410
1411 let heading = file_index.get_heading_by_anchor("installation-guide");
1413 assert!(heading.is_some());
1414 assert_eq!(heading.unwrap().text, "Installation Guide");
1415 assert_eq!(heading.unwrap().line, 10);
1416
1417 let heading = file_index.get_heading_by_anchor("install");
1419 assert!(heading.is_some());
1420 assert_eq!(heading.unwrap().text, "Installation Guide");
1421
1422 let heading = file_index.get_heading_by_anchor("configuration");
1424 assert!(heading.is_some());
1425 assert_eq!(heading.unwrap().text, "Configuration");
1426 assert_eq!(heading.unwrap().line, 20);
1427
1428 assert!(file_index.get_heading_by_anchor("nonexistent").is_none());
1430 }
1431
1432 #[test]
1433 fn test_anchor_lookup_many_headings() {
1434 let mut file_index = FileIndex::new();
1436
1437 for i in 0..100 {
1439 file_index.add_heading(HeadingIndex {
1440 text: format!("Heading {i}"),
1441 auto_anchor: format!("heading-{i}"),
1442 custom_anchor: Some(format!("h{i}")),
1443 line: i + 1,
1444 is_setext: false,
1445 });
1446 }
1447
1448 for i in 0..100 {
1450 assert!(file_index.has_anchor(&format!("heading-{i}")));
1451 assert!(file_index.has_anchor(&format!("h{i}")));
1452
1453 let heading = file_index.get_heading_by_anchor(&format!("heading-{i}"));
1454 assert!(heading.is_some());
1455 assert_eq!(heading.unwrap().line, i + 1);
1456 }
1457 }
1458
1459 #[test]
1464 fn test_extract_cross_file_links_basic() {
1465 use crate::config::MarkdownFlavor;
1466
1467 let content = "# Test\n\nSee [link](./other.md) for info.\n";
1468 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1469 let links = extract_cross_file_links(&ctx);
1470
1471 assert_eq!(links.len(), 1);
1472 assert_eq!(links[0].target_path, "./other.md");
1473 assert_eq!(links[0].fragment, "");
1474 assert_eq!(links[0].line, 3);
1475 assert_eq!(links[0].column, 12);
1477 }
1478
1479 #[test]
1480 fn test_extract_cross_file_links_with_fragment() {
1481 use crate::config::MarkdownFlavor;
1482
1483 let content = "Check [guide](./guide.md#install) here.\n";
1484 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1485 let links = extract_cross_file_links(&ctx);
1486
1487 assert_eq!(links.len(), 1);
1488 assert_eq!(links[0].target_path, "./guide.md");
1489 assert_eq!(links[0].fragment, "install");
1490 assert_eq!(links[0].line, 1);
1491 assert_eq!(links[0].column, 15);
1493 }
1494
1495 #[test]
1496 fn test_extract_cross_file_links_multiple_on_same_line() {
1497 use crate::config::MarkdownFlavor;
1498
1499 let content = "See [a](a.md) and [b](b.md) here.\n";
1500 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1501 let links = extract_cross_file_links(&ctx);
1502
1503 assert_eq!(links.len(), 2);
1504
1505 assert_eq!(links[0].target_path, "a.md");
1506 assert_eq!(links[0].line, 1);
1507 assert_eq!(links[0].column, 9);
1509
1510 assert_eq!(links[1].target_path, "b.md");
1511 assert_eq!(links[1].line, 1);
1512 assert_eq!(links[1].column, 23);
1514 }
1515
1516 #[test]
1517 fn test_extract_cross_file_links_angle_brackets() {
1518 use crate::config::MarkdownFlavor;
1519
1520 let content = "See [link](<path/with (parens).md>) here.\n";
1521 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1522 let links = extract_cross_file_links(&ctx);
1523
1524 assert_eq!(links.len(), 1);
1525 assert_eq!(links[0].target_path, "path/with (parens).md");
1526 assert_eq!(links[0].line, 1);
1527 assert_eq!(links[0].column, 13);
1529 }
1530
1531 #[test]
1532 fn test_extract_cross_file_links_skips_external() {
1533 use crate::config::MarkdownFlavor;
1534
1535 let content = r#"
1536[external](https://example.com)
1537[mailto](mailto:test@example.com)
1538[local](./local.md)
1539[fragment](#section)
1540[absolute](/docs/page.md)
1541"#;
1542 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1543 let links = extract_cross_file_links(&ctx);
1544
1545 assert_eq!(links.len(), 1);
1547 assert_eq!(links[0].target_path, "./local.md");
1548 }
1549
1550 #[test]
1551 fn test_extract_cross_file_links_skips_non_markdown() {
1552 use crate::config::MarkdownFlavor;
1553
1554 let content = r#"
1555[image](./photo.png)
1556[doc](./readme.md)
1557[pdf](./document.pdf)
1558"#;
1559 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1560 let links = extract_cross_file_links(&ctx);
1561
1562 assert_eq!(links.len(), 1);
1564 assert_eq!(links[0].target_path, "./readme.md");
1565 }
1566
1567 #[test]
1568 fn test_extract_cross_file_links_skips_code_spans() {
1569 use crate::config::MarkdownFlavor;
1570
1571 let content = "Normal [link](./file.md) and `[code](./ignored.md)` here.\n";
1572 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1573 let links = extract_cross_file_links(&ctx);
1574
1575 assert_eq!(links.len(), 1);
1577 assert_eq!(links[0].target_path, "./file.md");
1578 }
1579
1580 #[test]
1581 fn test_extract_cross_file_links_with_query_params() {
1582 use crate::config::MarkdownFlavor;
1583
1584 let content = "See [doc](./file.md?raw=true) here.\n";
1585 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1586 let links = extract_cross_file_links(&ctx);
1587
1588 assert_eq!(links.len(), 1);
1589 assert_eq!(links[0].target_path, "./file.md");
1591 }
1592
1593 #[test]
1594 fn test_extract_cross_file_links_empty_content() {
1595 use crate::config::MarkdownFlavor;
1596
1597 let content = "";
1598 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1599 let links = extract_cross_file_links(&ctx);
1600
1601 assert!(links.is_empty());
1602 }
1603
1604 #[test]
1605 fn test_extract_cross_file_links_no_links() {
1606 use crate::config::MarkdownFlavor;
1607
1608 let content = "# Just a heading\n\nSome text without links.\n";
1609 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1610 let links = extract_cross_file_links(&ctx);
1611
1612 assert!(links.is_empty());
1613 }
1614
1615 #[test]
1616 fn test_extract_cross_file_links_position_accuracy_issue_234() {
1617 use crate::config::MarkdownFlavor;
1620
1621 let content = r#"# Test Document
1622
1623Here is a [broken link](nonexistent-file.md) that should trigger MD057.
1624
1625And another [link](also-missing.md) on this line.
1626"#;
1627 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1628 let links = extract_cross_file_links(&ctx);
1629
1630 assert_eq!(links.len(), 2);
1631
1632 assert_eq!(links[0].target_path, "nonexistent-file.md");
1634 assert_eq!(links[0].line, 3);
1635 assert_eq!(links[0].column, 25);
1636
1637 assert_eq!(links[1].target_path, "also-missing.md");
1639 assert_eq!(links[1].line, 5);
1640 assert_eq!(links[1].column, 20);
1641 }
1642}