1use regex::Regex;
22use serde::{Deserialize, Serialize};
23use std::collections::{HashMap, HashSet};
24use std::path::{Path, PathBuf};
25use std::sync::LazyLock;
26
27use crate::lint_context::LintContext;
28use crate::utils::element_cache::ElementCache;
29
30fn hex_digit_to_value(c: u8) -> Option<u8> {
36 match c {
37 b'0'..=b'9' => Some(c - b'0'),
38 b'a'..=b'f' => Some(c - b'a' + 10),
39 b'A'..=b'F' => Some(c - b'A' + 10),
40 _ => None,
41 }
42}
43
44fn url_decode(s: &str) -> String {
48 if !s.contains('%') {
50 return s.to_string();
51 }
52
53 let bytes = s.as_bytes();
54 let mut result = Vec::with_capacity(bytes.len());
55 let mut i = 0;
56
57 while i < bytes.len() {
58 if bytes[i] == b'%' && i + 2 < bytes.len() {
59 let hex1 = bytes[i + 1];
61 let hex2 = bytes[i + 2];
62 if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
63 result.push(d1 * 16 + d2);
64 i += 3;
65 continue;
66 }
67 }
68 result.push(bytes[i]);
69 i += 1;
70 }
71
72 String::from_utf8(result).unwrap_or_else(|_| s.to_string())
74}
75
76static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
86
87static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
90 LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
91
92static URL_EXTRACT_REGEX: LazyLock<Regex> =
95 LazyLock::new(|| Regex::new(r#"]\(\s*([^>)\s#]+)(#[^)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
96
97pub(crate) static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
99 LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
100
101const MARKDOWN_EXTENSIONS: &[&str] = &[
103 ".md",
104 ".markdown",
105 ".mdx",
106 ".mkd",
107 ".mkdn",
108 ".mdown",
109 ".mdwn",
110 ".qmd",
111 ".rmd",
112];
113
114#[inline]
116fn is_markdown_file(path: &str) -> bool {
117 let path_lower = path.to_lowercase();
118 MARKDOWN_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext))
119}
120
121fn strip_query_and_fragment(url: &str) -> &str {
124 let query_pos = url.find('?');
125 let fragment_pos = url.find('#');
126
127 match (query_pos, fragment_pos) {
128 (Some(q), Some(f)) => &url[..q.min(f)],
129 (Some(q), None) => &url[..q],
130 (None, Some(f)) => &url[..f],
131 (None, None) => url,
132 }
133}
134
135pub fn extract_cross_file_links(ctx: &LintContext) -> Vec<CrossFileLinkIndex> {
143 let content = ctx.content;
144
145 if content.is_empty() || !content.contains("](") {
147 return Vec::new();
148 }
149
150 let mut links = Vec::new();
151 let lines: Vec<&str> = content.lines().collect();
152 let element_cache = ElementCache::new(content);
153 let line_index = &ctx.line_index;
154
155 let mut processed_lines = HashSet::new();
158
159 for link in &ctx.links {
160 let line_idx = link.line - 1;
161 if line_idx >= lines.len() {
162 continue;
163 }
164
165 if !processed_lines.insert(line_idx) {
167 continue;
168 }
169
170 let line = lines[line_idx];
171 if !line.contains("](") {
172 continue;
173 }
174
175 for link_match in LINK_START_REGEX.find_iter(line) {
177 let start_pos = link_match.start();
178 let end_pos = link_match.end();
179
180 let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
182 let absolute_start_pos = line_start_byte + start_pos;
183
184 if element_cache.is_in_code_span(absolute_start_pos) {
186 continue;
187 }
188
189 let caps_result = URL_EXTRACT_ANGLE_BRACKET_REGEX
192 .captures_at(line, end_pos - 1)
193 .or_else(|| URL_EXTRACT_REGEX.captures_at(line, end_pos - 1));
194
195 if let Some(caps) = caps_result
196 && let Some(url_group) = caps.get(1)
197 {
198 let file_path = url_group.as_str().trim();
199
200 if file_path.is_empty()
203 || PROTOCOL_DOMAIN_REGEX.is_match(file_path)
204 || file_path.starts_with("www.")
205 || file_path.starts_with('#')
206 || file_path.starts_with("{{")
207 || file_path.starts_with("{%")
208 || file_path.starts_with('/')
209 || file_path.starts_with('~')
210 || file_path.starts_with('@')
211 || (file_path.starts_with('`') && file_path.ends_with('`'))
212 {
213 continue;
214 }
215
216 let file_path = strip_query_and_fragment(file_path);
218
219 let fragment = caps.get(2).map(|m| m.as_str().trim_start_matches('#')).unwrap_or("");
221
222 if is_markdown_file(file_path) {
224 links.push(CrossFileLinkIndex {
225 target_path: file_path.to_string(),
226 fragment: fragment.to_string(),
227 line: link.line,
228 column: url_group.start() + 1,
229 });
230 }
231 }
232 }
233 }
234
235 links
236}
237
238#[cfg(feature = "native")]
240const CACHE_MAGIC: &[u8; 4] = b"RWSI";
241
242#[cfg(feature = "native")]
244const CACHE_FORMAT_VERSION: u32 = 5;
245
246#[cfg(feature = "native")]
248const CACHE_FILE_NAME: &str = "workspace_index.bin";
249
250#[derive(Debug, Default, Clone, Serialize, Deserialize)]
255pub struct WorkspaceIndex {
256 files: HashMap<PathBuf, FileIndex>,
258 reverse_deps: HashMap<PathBuf, HashSet<PathBuf>>,
261 version: u64,
263}
264
265#[derive(Debug, Clone, Default, Serialize, Deserialize)]
267pub struct FileIndex {
268 pub headings: Vec<HeadingIndex>,
270 pub reference_links: Vec<ReferenceLinkIndex>,
272 pub cross_file_links: Vec<CrossFileLinkIndex>,
274 pub defined_references: HashSet<String>,
277 pub content_hash: String,
279 anchor_to_heading: HashMap<String, usize>,
282 html_anchors: HashSet<String>,
285 attribute_anchors: HashSet<String>,
289 pub file_disabled_rules: HashSet<String>,
292 pub persistent_transitions: Vec<(usize, HashSet<String>, HashSet<String>)>,
295 pub line_disabled_rules: HashMap<usize, HashSet<String>>,
297}
298
299#[derive(Debug, Clone, Serialize, Deserialize)]
301pub struct HeadingIndex {
302 pub text: String,
304 pub auto_anchor: String,
306 pub custom_anchor: Option<String>,
308 pub line: usize,
310 #[serde(default)]
312 pub is_setext: bool,
313}
314
315#[derive(Debug, Clone, Serialize, Deserialize)]
317pub struct ReferenceLinkIndex {
318 pub reference_id: String,
320 pub line: usize,
322 pub column: usize,
324}
325
326#[derive(Debug, Clone, Serialize, Deserialize)]
328pub struct CrossFileLinkIndex {
329 pub target_path: String,
331 pub fragment: String,
333 pub line: usize,
335 pub column: usize,
337}
338
339#[derive(Debug, Clone, Serialize, Deserialize)]
341pub struct VulnerableAnchor {
342 pub file: PathBuf,
344 pub line: usize,
346 pub text: String,
348}
349
350impl WorkspaceIndex {
351 pub fn new() -> Self {
353 Self::default()
354 }
355
356 pub fn version(&self) -> u64 {
358 self.version
359 }
360
361 pub fn file_count(&self) -> usize {
363 self.files.len()
364 }
365
366 pub fn contains_file(&self, path: &Path) -> bool {
368 self.files.contains_key(path)
369 }
370
371 pub fn get_file(&self, path: &Path) -> Option<&FileIndex> {
373 self.files.get(path)
374 }
375
376 pub fn insert_file(&mut self, path: PathBuf, index: FileIndex) {
378 self.files.insert(path, index);
379 self.version = self.version.wrapping_add(1);
380 }
381
382 pub fn remove_file(&mut self, path: &Path) -> Option<FileIndex> {
384 self.clear_reverse_deps_for(path);
386
387 let result = self.files.remove(path);
388 if result.is_some() {
389 self.version = self.version.wrapping_add(1);
390 }
391 result
392 }
393
394 pub fn get_vulnerable_anchors(&self) -> HashMap<String, Vec<VulnerableAnchor>> {
404 let mut vulnerable: HashMap<String, Vec<VulnerableAnchor>> = HashMap::new();
405
406 for (file_path, file_index) in &self.files {
407 for heading in &file_index.headings {
408 if heading.custom_anchor.is_none() && !heading.auto_anchor.is_empty() {
410 let anchor_key = heading.auto_anchor.to_lowercase();
411 vulnerable.entry(anchor_key).or_default().push(VulnerableAnchor {
412 file: file_path.clone(),
413 line: heading.line,
414 text: heading.text.clone(),
415 });
416 }
417 }
418 }
419
420 vulnerable
421 }
422
423 pub fn all_headings(&self) -> impl Iterator<Item = (&Path, &HeadingIndex)> {
425 self.files
426 .iter()
427 .flat_map(|(path, index)| index.headings.iter().map(move |h| (path.as_path(), h)))
428 }
429
430 pub fn files(&self) -> impl Iterator<Item = (&Path, &FileIndex)> {
432 self.files.iter().map(|(p, i)| (p.as_path(), i))
433 }
434
435 pub fn clear(&mut self) {
437 self.files.clear();
438 self.reverse_deps.clear();
439 self.version = self.version.wrapping_add(1);
440 }
441
442 pub fn update_file(&mut self, path: &Path, index: FileIndex) {
449 self.clear_reverse_deps_as_source(path);
452
453 for link in &index.cross_file_links {
455 let target = self.resolve_target_path(path, &link.target_path);
456 self.reverse_deps.entry(target).or_default().insert(path.to_path_buf());
457 }
458
459 self.files.insert(path.to_path_buf(), index);
460 self.version = self.version.wrapping_add(1);
461 }
462
463 pub fn get_dependents(&self, path: &Path) -> Vec<PathBuf> {
468 self.reverse_deps
469 .get(path)
470 .map(|set| set.iter().cloned().collect())
471 .unwrap_or_default()
472 }
473
474 pub fn is_file_stale(&self, path: &Path, current_hash: &str) -> bool {
478 self.files
479 .get(path)
480 .map(|f| f.content_hash != current_hash)
481 .unwrap_or(true)
482 }
483
484 pub fn retain_only(&mut self, current_files: &std::collections::HashSet<PathBuf>) -> usize {
489 let before_count = self.files.len();
490
491 let to_remove: Vec<PathBuf> = self
493 .files
494 .keys()
495 .filter(|path| !current_files.contains(*path))
496 .cloned()
497 .collect();
498
499 for path in &to_remove {
501 self.remove_file(path);
502 }
503
504 before_count - self.files.len()
505 }
506
507 #[cfg(feature = "native")]
514 pub fn save_to_cache(&self, cache_dir: &Path) -> std::io::Result<()> {
515 use std::fs;
516 use std::io::Write;
517
518 fs::create_dir_all(cache_dir)?;
520
521 let encoded = postcard::to_allocvec(self)
523 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
524
525 let mut cache_data = Vec::with_capacity(8 + encoded.len());
527 cache_data.extend_from_slice(CACHE_MAGIC);
528 cache_data.extend_from_slice(&CACHE_FORMAT_VERSION.to_le_bytes());
529 cache_data.extend_from_slice(&encoded);
530
531 let final_path = cache_dir.join(CACHE_FILE_NAME);
533 let temp_path = cache_dir.join(format!("{}.tmp.{}", CACHE_FILE_NAME, std::process::id()));
534
535 {
537 let mut file = fs::File::create(&temp_path)?;
538 file.write_all(&cache_data)?;
539 file.sync_all()?;
540 }
541
542 fs::rename(&temp_path, &final_path)?;
544
545 log::debug!(
546 "Saved workspace index to cache: {} files, {} bytes (format v{})",
547 self.files.len(),
548 cache_data.len(),
549 CACHE_FORMAT_VERSION
550 );
551
552 Ok(())
553 }
554
555 #[cfg(feature = "native")]
563 pub fn load_from_cache(cache_dir: &Path) -> Option<Self> {
564 use std::fs;
565
566 let path = cache_dir.join(CACHE_FILE_NAME);
567 let data = fs::read(&path).ok()?;
568
569 if data.len() < 8 {
571 log::warn!("Workspace index cache too small, discarding");
572 let _ = fs::remove_file(&path);
573 return None;
574 }
575
576 if &data[0..4] != CACHE_MAGIC {
578 log::warn!("Workspace index cache has invalid magic header, discarding");
579 let _ = fs::remove_file(&path);
580 return None;
581 }
582
583 let version = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
585 if version != CACHE_FORMAT_VERSION {
586 log::info!(
587 "Workspace index cache format version mismatch (got {version}, expected {CACHE_FORMAT_VERSION}), rebuilding"
588 );
589 let _ = fs::remove_file(&path);
590 return None;
591 }
592
593 match postcard::from_bytes::<Self>(&data[8..]) {
595 Ok(index) => {
596 log::debug!(
597 "Loaded workspace index from cache: {} files (format v{})",
598 index.files.len(),
599 version
600 );
601 Some(index)
602 }
603 Err(e) => {
604 log::warn!("Failed to deserialize workspace index cache: {e}");
605 let _ = fs::remove_file(&path);
606 None
607 }
608 }
609 }
610
611 fn clear_reverse_deps_as_source(&mut self, path: &Path) {
616 for deps in self.reverse_deps.values_mut() {
617 deps.remove(path);
618 }
619 self.reverse_deps.retain(|_, deps| !deps.is_empty());
621 }
622
623 fn clear_reverse_deps_for(&mut self, path: &Path) {
628 self.clear_reverse_deps_as_source(path);
630
631 self.reverse_deps.remove(path);
633 }
634
635 fn resolve_target_path(&self, source_file: &Path, relative_target: &str) -> PathBuf {
637 let source_dir = source_file.parent().unwrap_or(Path::new(""));
639
640 let target = source_dir.join(relative_target);
642
643 Self::normalize_path(&target)
645 }
646
647 fn normalize_path(path: &Path) -> PathBuf {
649 let mut components = Vec::new();
650
651 for component in path.components() {
652 match component {
653 std::path::Component::ParentDir => {
654 if !components.is_empty() {
656 components.pop();
657 }
658 }
659 std::path::Component::CurDir => {
660 }
662 _ => {
663 components.push(component);
664 }
665 }
666 }
667
668 components.iter().collect()
669 }
670}
671
672impl FileIndex {
673 pub fn new() -> Self {
675 Self::default()
676 }
677
678 pub fn with_hash(content_hash: String) -> Self {
680 Self {
681 content_hash,
682 ..Default::default()
683 }
684 }
685
686 pub fn add_heading(&mut self, heading: HeadingIndex) {
690 let index = self.headings.len();
691
692 self.anchor_to_heading.insert(heading.auto_anchor.to_lowercase(), index);
694
695 if let Some(ref custom) = heading.custom_anchor {
697 self.anchor_to_heading.insert(custom.to_lowercase(), index);
698 }
699
700 self.headings.push(heading);
701 }
702
703 pub fn add_anchor_alias(&mut self, anchor: String, heading_index: usize) {
706 if heading_index < self.headings.len() {
707 self.anchor_to_heading.insert(anchor.to_lowercase(), heading_index);
708 }
709 }
710
711 pub fn has_anchor(&self, anchor: &str) -> bool {
722 let lower = anchor.to_lowercase();
723
724 if self.anchor_to_heading.contains_key(&lower)
726 || self.html_anchors.contains(&lower)
727 || self.attribute_anchors.contains(&lower)
728 {
729 return true;
730 }
731
732 if anchor.contains('%') {
734 let decoded = url_decode(anchor).to_lowercase();
735 if decoded != lower {
736 return self.anchor_to_heading.contains_key(&decoded)
737 || self.html_anchors.contains(&decoded)
738 || self.attribute_anchors.contains(&decoded);
739 }
740 }
741
742 false
743 }
744
745 pub fn add_html_anchor(&mut self, anchor: String) {
747 if !anchor.is_empty() {
748 self.html_anchors.insert(anchor.to_lowercase());
749 }
750 }
751
752 pub fn add_attribute_anchor(&mut self, anchor: String) {
754 if !anchor.is_empty() {
755 self.attribute_anchors.insert(anchor.to_lowercase());
756 }
757 }
758
759 pub fn get_heading_by_anchor(&self, anchor: &str) -> Option<&HeadingIndex> {
763 self.anchor_to_heading
764 .get(&anchor.to_lowercase())
765 .and_then(|&idx| self.headings.get(idx))
766 }
767
768 pub fn add_reference_link(&mut self, link: ReferenceLinkIndex) {
770 self.reference_links.push(link);
771 }
772
773 pub fn is_rule_disabled_at_line(&self, rule_name: &str, line: usize) -> bool {
778 if self.file_disabled_rules.contains("*") || self.file_disabled_rules.contains(rule_name) {
780 return true;
781 }
782
783 if let Some(rules) = self.line_disabled_rules.get(&line)
785 && (rules.contains("*") || rules.contains(rule_name))
786 {
787 return true;
788 }
789
790 if !self.persistent_transitions.is_empty() {
792 let idx = match self.persistent_transitions.binary_search_by_key(&line, |t| t.0) {
793 Ok(i) => Some(i),
794 Err(i) => {
795 if i > 0 {
796 Some(i - 1)
797 } else {
798 None
799 }
800 }
801 };
802 if let Some(i) = idx {
803 let (_, ref disabled, ref enabled) = self.persistent_transitions[i];
804 if disabled.contains("*") {
805 return !enabled.contains(rule_name);
806 }
807 return disabled.contains(rule_name);
808 }
809 }
810
811 false
812 }
813
814 pub fn add_cross_file_link(&mut self, link: CrossFileLinkIndex) {
816 let is_duplicate = self.cross_file_links.iter().any(|existing| {
819 existing.target_path == link.target_path && existing.fragment == link.fragment && existing.line == link.line
820 });
821 if !is_duplicate {
822 self.cross_file_links.push(link);
823 }
824 }
825
826 pub fn add_defined_reference(&mut self, ref_id: String) {
828 self.defined_references.insert(ref_id);
829 }
830
831 pub fn has_defined_reference(&self, ref_id: &str) -> bool {
833 self.defined_references.contains(ref_id)
834 }
835
836 pub fn hash_matches(&self, hash: &str) -> bool {
838 self.content_hash == hash
839 }
840
841 pub fn heading_count(&self) -> usize {
843 self.headings.len()
844 }
845
846 pub fn reference_link_count(&self) -> usize {
848 self.reference_links.len()
849 }
850}
851
852#[cfg(test)]
853mod tests {
854 use super::*;
855
856 #[test]
857 fn test_workspace_index_basic() {
858 let mut index = WorkspaceIndex::new();
859 assert_eq!(index.file_count(), 0);
860 assert_eq!(index.version(), 0);
861
862 let mut file_index = FileIndex::with_hash("abc123".to_string());
863 file_index.add_heading(HeadingIndex {
864 text: "Installation".to_string(),
865 auto_anchor: "installation".to_string(),
866 custom_anchor: None,
867 line: 1,
868 is_setext: false,
869 });
870
871 index.insert_file(PathBuf::from("docs/install.md"), file_index);
872 assert_eq!(index.file_count(), 1);
873 assert_eq!(index.version(), 1);
874
875 assert!(index.contains_file(Path::new("docs/install.md")));
876 assert!(!index.contains_file(Path::new("docs/other.md")));
877 }
878
879 #[test]
880 fn test_vulnerable_anchors() {
881 let mut index = WorkspaceIndex::new();
882
883 let mut file1 = FileIndex::new();
885 file1.add_heading(HeadingIndex {
886 text: "Getting Started".to_string(),
887 auto_anchor: "getting-started".to_string(),
888 custom_anchor: None,
889 line: 1,
890 is_setext: false,
891 });
892 index.insert_file(PathBuf::from("docs/guide.md"), file1);
893
894 let mut file2 = FileIndex::new();
896 file2.add_heading(HeadingIndex {
897 text: "Installation".to_string(),
898 auto_anchor: "installation".to_string(),
899 custom_anchor: Some("install".to_string()),
900 line: 1,
901 is_setext: false,
902 });
903 index.insert_file(PathBuf::from("docs/install.md"), file2);
904
905 let vulnerable = index.get_vulnerable_anchors();
906 assert_eq!(vulnerable.len(), 1);
907 assert!(vulnerable.contains_key("getting-started"));
908 assert!(!vulnerable.contains_key("installation"));
909
910 let anchors = vulnerable.get("getting-started").unwrap();
911 assert_eq!(anchors.len(), 1);
912 assert_eq!(anchors[0].file, PathBuf::from("docs/guide.md"));
913 assert_eq!(anchors[0].text, "Getting Started");
914 }
915
916 #[test]
917 fn test_vulnerable_anchors_multiple_files_same_anchor() {
918 let mut index = WorkspaceIndex::new();
921
922 let mut file1 = FileIndex::new();
924 file1.add_heading(HeadingIndex {
925 text: "Installation".to_string(),
926 auto_anchor: "installation".to_string(),
927 custom_anchor: None,
928 line: 1,
929 is_setext: false,
930 });
931 index.insert_file(PathBuf::from("docs/en/guide.md"), file1);
932
933 let mut file2 = FileIndex::new();
935 file2.add_heading(HeadingIndex {
936 text: "Installation".to_string(),
937 auto_anchor: "installation".to_string(),
938 custom_anchor: None,
939 line: 5,
940 is_setext: false,
941 });
942 index.insert_file(PathBuf::from("docs/fr/guide.md"), file2);
943
944 let mut file3 = FileIndex::new();
946 file3.add_heading(HeadingIndex {
947 text: "Installation".to_string(),
948 auto_anchor: "installation".to_string(),
949 custom_anchor: Some("install".to_string()),
950 line: 10,
951 is_setext: false,
952 });
953 index.insert_file(PathBuf::from("docs/de/guide.md"), file3);
954
955 let vulnerable = index.get_vulnerable_anchors();
956 assert_eq!(vulnerable.len(), 1); assert!(vulnerable.contains_key("installation"));
958
959 let anchors = vulnerable.get("installation").unwrap();
960 assert_eq!(anchors.len(), 2, "Should collect both vulnerable anchors");
962
963 let files: std::collections::HashSet<_> = anchors.iter().map(|a| &a.file).collect();
965 assert!(files.contains(&PathBuf::from("docs/en/guide.md")));
966 assert!(files.contains(&PathBuf::from("docs/fr/guide.md")));
967 }
968
969 #[test]
970 fn test_file_index_hash() {
971 let index = FileIndex::with_hash("hash123".to_string());
972 assert!(index.hash_matches("hash123"));
973 assert!(!index.hash_matches("other"));
974 }
975
976 #[test]
977 fn test_version_increment() {
978 let mut index = WorkspaceIndex::new();
979 assert_eq!(index.version(), 0);
980
981 index.insert_file(PathBuf::from("a.md"), FileIndex::new());
982 assert_eq!(index.version(), 1);
983
984 index.insert_file(PathBuf::from("b.md"), FileIndex::new());
985 assert_eq!(index.version(), 2);
986
987 index.remove_file(Path::new("a.md"));
988 assert_eq!(index.version(), 3);
989
990 index.remove_file(Path::new("nonexistent.md"));
992 assert_eq!(index.version(), 3);
993 }
994
995 #[test]
996 fn test_reverse_deps_basic() {
997 let mut index = WorkspaceIndex::new();
998
999 let mut file_a = FileIndex::new();
1001 file_a.add_cross_file_link(CrossFileLinkIndex {
1002 target_path: "b.md".to_string(),
1003 fragment: "section".to_string(),
1004 line: 10,
1005 column: 5,
1006 });
1007 index.update_file(Path::new("docs/a.md"), file_a);
1008
1009 let dependents = index.get_dependents(Path::new("docs/b.md"));
1011 assert_eq!(dependents.len(), 1);
1012 assert_eq!(dependents[0], PathBuf::from("docs/a.md"));
1013
1014 let a_dependents = index.get_dependents(Path::new("docs/a.md"));
1016 assert!(a_dependents.is_empty());
1017 }
1018
1019 #[test]
1020 fn test_reverse_deps_multiple() {
1021 let mut index = WorkspaceIndex::new();
1022
1023 let mut file_a = FileIndex::new();
1025 file_a.add_cross_file_link(CrossFileLinkIndex {
1026 target_path: "../b.md".to_string(),
1027 fragment: "".to_string(),
1028 line: 1,
1029 column: 1,
1030 });
1031 index.update_file(Path::new("docs/sub/a.md"), file_a);
1032
1033 let mut file_c = FileIndex::new();
1034 file_c.add_cross_file_link(CrossFileLinkIndex {
1035 target_path: "b.md".to_string(),
1036 fragment: "".to_string(),
1037 line: 1,
1038 column: 1,
1039 });
1040 index.update_file(Path::new("docs/c.md"), file_c);
1041
1042 let dependents = index.get_dependents(Path::new("docs/b.md"));
1044 assert_eq!(dependents.len(), 2);
1045 assert!(dependents.contains(&PathBuf::from("docs/sub/a.md")));
1046 assert!(dependents.contains(&PathBuf::from("docs/c.md")));
1047 }
1048
1049 #[test]
1050 fn test_reverse_deps_update_clears_old() {
1051 let mut index = WorkspaceIndex::new();
1052
1053 let mut file_a = FileIndex::new();
1055 file_a.add_cross_file_link(CrossFileLinkIndex {
1056 target_path: "b.md".to_string(),
1057 fragment: "".to_string(),
1058 line: 1,
1059 column: 1,
1060 });
1061 index.update_file(Path::new("docs/a.md"), file_a);
1062
1063 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1065
1066 let mut file_a_updated = FileIndex::new();
1068 file_a_updated.add_cross_file_link(CrossFileLinkIndex {
1069 target_path: "c.md".to_string(),
1070 fragment: "".to_string(),
1071 line: 1,
1072 column: 1,
1073 });
1074 index.update_file(Path::new("docs/a.md"), file_a_updated);
1075
1076 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1078
1079 let c_deps = index.get_dependents(Path::new("docs/c.md"));
1081 assert_eq!(c_deps.len(), 1);
1082 assert_eq!(c_deps[0], PathBuf::from("docs/a.md"));
1083 }
1084
1085 #[test]
1086 fn test_reverse_deps_remove_file() {
1087 let mut index = WorkspaceIndex::new();
1088
1089 let mut file_a = FileIndex::new();
1091 file_a.add_cross_file_link(CrossFileLinkIndex {
1092 target_path: "b.md".to_string(),
1093 fragment: "".to_string(),
1094 line: 1,
1095 column: 1,
1096 });
1097 index.update_file(Path::new("docs/a.md"), file_a);
1098
1099 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1101
1102 index.remove_file(Path::new("docs/a.md"));
1104
1105 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1107 }
1108
1109 #[test]
1110 fn test_normalize_path() {
1111 let path = Path::new("docs/sub/../other.md");
1113 let normalized = WorkspaceIndex::normalize_path(path);
1114 assert_eq!(normalized, PathBuf::from("docs/other.md"));
1115
1116 let path2 = Path::new("docs/./other.md");
1118 let normalized2 = WorkspaceIndex::normalize_path(path2);
1119 assert_eq!(normalized2, PathBuf::from("docs/other.md"));
1120
1121 let path3 = Path::new("a/b/c/../../d.md");
1123 let normalized3 = WorkspaceIndex::normalize_path(path3);
1124 assert_eq!(normalized3, PathBuf::from("a/d.md"));
1125 }
1126
1127 #[test]
1128 fn test_clear_clears_reverse_deps() {
1129 let mut index = WorkspaceIndex::new();
1130
1131 let mut file_a = FileIndex::new();
1133 file_a.add_cross_file_link(CrossFileLinkIndex {
1134 target_path: "b.md".to_string(),
1135 fragment: "".to_string(),
1136 line: 1,
1137 column: 1,
1138 });
1139 index.update_file(Path::new("docs/a.md"), file_a);
1140
1141 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1143
1144 index.clear();
1146
1147 assert_eq!(index.file_count(), 0);
1149 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1150 }
1151
1152 #[test]
1153 fn test_is_file_stale() {
1154 let mut index = WorkspaceIndex::new();
1155
1156 assert!(index.is_file_stale(Path::new("nonexistent.md"), "hash123"));
1158
1159 let file_index = FileIndex::with_hash("hash123".to_string());
1161 index.insert_file(PathBuf::from("docs/test.md"), file_index);
1162
1163 assert!(!index.is_file_stale(Path::new("docs/test.md"), "hash123"));
1165
1166 assert!(index.is_file_stale(Path::new("docs/test.md"), "different_hash"));
1168 }
1169
1170 #[cfg(feature = "native")]
1171 #[test]
1172 fn test_cache_roundtrip() {
1173 use std::fs;
1174
1175 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_roundtrip");
1177 let _ = fs::remove_dir_all(&temp_dir);
1178 fs::create_dir_all(&temp_dir).unwrap();
1179
1180 let mut index = WorkspaceIndex::new();
1182
1183 let mut file1 = FileIndex::with_hash("abc123".to_string());
1184 file1.add_heading(HeadingIndex {
1185 text: "Test Heading".to_string(),
1186 auto_anchor: "test-heading".to_string(),
1187 custom_anchor: Some("test".to_string()),
1188 line: 1,
1189 is_setext: false,
1190 });
1191 file1.add_cross_file_link(CrossFileLinkIndex {
1192 target_path: "./other.md".to_string(),
1193 fragment: "section".to_string(),
1194 line: 5,
1195 column: 3,
1196 });
1197 index.update_file(Path::new("docs/file1.md"), file1);
1198
1199 let mut file2 = FileIndex::with_hash("def456".to_string());
1200 file2.add_heading(HeadingIndex {
1201 text: "Another Heading".to_string(),
1202 auto_anchor: "another-heading".to_string(),
1203 custom_anchor: None,
1204 line: 1,
1205 is_setext: false,
1206 });
1207 index.update_file(Path::new("docs/other.md"), file2);
1208
1209 index.save_to_cache(&temp_dir).expect("Failed to save cache");
1211
1212 assert!(temp_dir.join("workspace_index.bin").exists());
1214
1215 let loaded = WorkspaceIndex::load_from_cache(&temp_dir).expect("Failed to load cache");
1217
1218 assert_eq!(loaded.file_count(), 2);
1220 assert!(loaded.contains_file(Path::new("docs/file1.md")));
1221 assert!(loaded.contains_file(Path::new("docs/other.md")));
1222
1223 let file1_loaded = loaded.get_file(Path::new("docs/file1.md")).unwrap();
1225 assert_eq!(file1_loaded.content_hash, "abc123");
1226 assert_eq!(file1_loaded.headings.len(), 1);
1227 assert_eq!(file1_loaded.headings[0].text, "Test Heading");
1228 assert_eq!(file1_loaded.headings[0].custom_anchor, Some("test".to_string()));
1229 assert_eq!(file1_loaded.cross_file_links.len(), 1);
1230 assert_eq!(file1_loaded.cross_file_links[0].target_path, "./other.md");
1231
1232 let dependents = loaded.get_dependents(Path::new("docs/other.md"));
1234 assert_eq!(dependents.len(), 1);
1235 assert_eq!(dependents[0], PathBuf::from("docs/file1.md"));
1236
1237 let _ = fs::remove_dir_all(&temp_dir);
1239 }
1240
1241 #[cfg(feature = "native")]
1242 #[test]
1243 fn test_cache_missing_file() {
1244 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_missing");
1245 let _ = std::fs::remove_dir_all(&temp_dir);
1246
1247 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1249 assert!(result.is_none());
1250 }
1251
1252 #[cfg(feature = "native")]
1253 #[test]
1254 fn test_cache_corrupted_file() {
1255 use std::fs;
1256
1257 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_corrupted");
1258 let _ = fs::remove_dir_all(&temp_dir);
1259 fs::create_dir_all(&temp_dir).unwrap();
1260
1261 fs::write(temp_dir.join("workspace_index.bin"), b"bad").unwrap();
1263
1264 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1266 assert!(result.is_none());
1267
1268 assert!(!temp_dir.join("workspace_index.bin").exists());
1270
1271 let _ = fs::remove_dir_all(&temp_dir);
1273 }
1274
1275 #[cfg(feature = "native")]
1276 #[test]
1277 fn test_cache_invalid_magic() {
1278 use std::fs;
1279
1280 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_invalid_magic");
1281 let _ = fs::remove_dir_all(&temp_dir);
1282 fs::create_dir_all(&temp_dir).unwrap();
1283
1284 let mut data = Vec::new();
1286 data.extend_from_slice(b"XXXX"); data.extend_from_slice(&1u32.to_le_bytes()); data.extend_from_slice(&[0; 100]); fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
1290
1291 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1293 assert!(result.is_none());
1294
1295 assert!(!temp_dir.join("workspace_index.bin").exists());
1297
1298 let _ = fs::remove_dir_all(&temp_dir);
1300 }
1301
1302 #[cfg(feature = "native")]
1303 #[test]
1304 fn test_cache_version_mismatch() {
1305 use std::fs;
1306
1307 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_version_mismatch");
1308 let _ = fs::remove_dir_all(&temp_dir);
1309 fs::create_dir_all(&temp_dir).unwrap();
1310
1311 let mut data = Vec::new();
1313 data.extend_from_slice(b"RWSI"); data.extend_from_slice(&999u32.to_le_bytes()); data.extend_from_slice(&[0; 100]); fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
1317
1318 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1320 assert!(result.is_none());
1321
1322 assert!(!temp_dir.join("workspace_index.bin").exists());
1324
1325 let _ = fs::remove_dir_all(&temp_dir);
1327 }
1328
1329 #[cfg(feature = "native")]
1330 #[test]
1331 fn test_cache_atomic_write() {
1332 use std::fs;
1333
1334 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_atomic");
1336 let _ = fs::remove_dir_all(&temp_dir);
1337 fs::create_dir_all(&temp_dir).unwrap();
1338
1339 let index = WorkspaceIndex::new();
1340 index.save_to_cache(&temp_dir).expect("Failed to save");
1341
1342 let entries: Vec<_> = fs::read_dir(&temp_dir).unwrap().collect();
1344 assert_eq!(entries.len(), 1);
1345 assert!(temp_dir.join("workspace_index.bin").exists());
1346
1347 let _ = fs::remove_dir_all(&temp_dir);
1349 }
1350
1351 #[test]
1352 fn test_has_anchor_auto_generated() {
1353 let mut file_index = FileIndex::new();
1354 file_index.add_heading(HeadingIndex {
1355 text: "Installation Guide".to_string(),
1356 auto_anchor: "installation-guide".to_string(),
1357 custom_anchor: None,
1358 line: 1,
1359 is_setext: false,
1360 });
1361
1362 assert!(file_index.has_anchor("installation-guide"));
1364
1365 assert!(file_index.has_anchor("Installation-Guide"));
1367 assert!(file_index.has_anchor("INSTALLATION-GUIDE"));
1368
1369 assert!(!file_index.has_anchor("nonexistent"));
1371 }
1372
1373 #[test]
1374 fn test_has_anchor_custom() {
1375 let mut file_index = FileIndex::new();
1376 file_index.add_heading(HeadingIndex {
1377 text: "Installation Guide".to_string(),
1378 auto_anchor: "installation-guide".to_string(),
1379 custom_anchor: Some("install".to_string()),
1380 line: 1,
1381 is_setext: false,
1382 });
1383
1384 assert!(file_index.has_anchor("installation-guide"));
1386
1387 assert!(file_index.has_anchor("install"));
1389 assert!(file_index.has_anchor("Install")); assert!(!file_index.has_anchor("nonexistent"));
1393 }
1394
1395 #[test]
1396 fn test_get_heading_by_anchor() {
1397 let mut file_index = FileIndex::new();
1398 file_index.add_heading(HeadingIndex {
1399 text: "Installation Guide".to_string(),
1400 auto_anchor: "installation-guide".to_string(),
1401 custom_anchor: Some("install".to_string()),
1402 line: 10,
1403 is_setext: false,
1404 });
1405 file_index.add_heading(HeadingIndex {
1406 text: "Configuration".to_string(),
1407 auto_anchor: "configuration".to_string(),
1408 custom_anchor: None,
1409 line: 20,
1410 is_setext: false,
1411 });
1412
1413 let heading = file_index.get_heading_by_anchor("installation-guide");
1415 assert!(heading.is_some());
1416 assert_eq!(heading.unwrap().text, "Installation Guide");
1417 assert_eq!(heading.unwrap().line, 10);
1418
1419 let heading = file_index.get_heading_by_anchor("install");
1421 assert!(heading.is_some());
1422 assert_eq!(heading.unwrap().text, "Installation Guide");
1423
1424 let heading = file_index.get_heading_by_anchor("configuration");
1426 assert!(heading.is_some());
1427 assert_eq!(heading.unwrap().text, "Configuration");
1428 assert_eq!(heading.unwrap().line, 20);
1429
1430 assert!(file_index.get_heading_by_anchor("nonexistent").is_none());
1432 }
1433
1434 #[test]
1435 fn test_anchor_lookup_many_headings() {
1436 let mut file_index = FileIndex::new();
1438
1439 for i in 0..100 {
1441 file_index.add_heading(HeadingIndex {
1442 text: format!("Heading {i}"),
1443 auto_anchor: format!("heading-{i}"),
1444 custom_anchor: Some(format!("h{i}")),
1445 line: i + 1,
1446 is_setext: false,
1447 });
1448 }
1449
1450 for i in 0..100 {
1452 assert!(file_index.has_anchor(&format!("heading-{i}")));
1453 assert!(file_index.has_anchor(&format!("h{i}")));
1454
1455 let heading = file_index.get_heading_by_anchor(&format!("heading-{i}"));
1456 assert!(heading.is_some());
1457 assert_eq!(heading.unwrap().line, i + 1);
1458 }
1459 }
1460
1461 #[test]
1466 fn test_extract_cross_file_links_basic() {
1467 use crate::config::MarkdownFlavor;
1468
1469 let content = "# Test\n\nSee [link](./other.md) for info.\n";
1470 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1471 let links = extract_cross_file_links(&ctx);
1472
1473 assert_eq!(links.len(), 1);
1474 assert_eq!(links[0].target_path, "./other.md");
1475 assert_eq!(links[0].fragment, "");
1476 assert_eq!(links[0].line, 3);
1477 assert_eq!(links[0].column, 12);
1479 }
1480
1481 #[test]
1482 fn test_extract_cross_file_links_with_fragment() {
1483 use crate::config::MarkdownFlavor;
1484
1485 let content = "Check [guide](./guide.md#install) here.\n";
1486 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1487 let links = extract_cross_file_links(&ctx);
1488
1489 assert_eq!(links.len(), 1);
1490 assert_eq!(links[0].target_path, "./guide.md");
1491 assert_eq!(links[0].fragment, "install");
1492 assert_eq!(links[0].line, 1);
1493 assert_eq!(links[0].column, 15);
1495 }
1496
1497 #[test]
1498 fn test_extract_cross_file_links_multiple_on_same_line() {
1499 use crate::config::MarkdownFlavor;
1500
1501 let content = "See [a](a.md) and [b](b.md) here.\n";
1502 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1503 let links = extract_cross_file_links(&ctx);
1504
1505 assert_eq!(links.len(), 2);
1506
1507 assert_eq!(links[0].target_path, "a.md");
1508 assert_eq!(links[0].line, 1);
1509 assert_eq!(links[0].column, 9);
1511
1512 assert_eq!(links[1].target_path, "b.md");
1513 assert_eq!(links[1].line, 1);
1514 assert_eq!(links[1].column, 23);
1516 }
1517
1518 #[test]
1519 fn test_extract_cross_file_links_angle_brackets() {
1520 use crate::config::MarkdownFlavor;
1521
1522 let content = "See [link](<path/with (parens).md>) here.\n";
1523 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1524 let links = extract_cross_file_links(&ctx);
1525
1526 assert_eq!(links.len(), 1);
1527 assert_eq!(links[0].target_path, "path/with (parens).md");
1528 assert_eq!(links[0].line, 1);
1529 assert_eq!(links[0].column, 13);
1531 }
1532
1533 #[test]
1534 fn test_extract_cross_file_links_skips_external() {
1535 use crate::config::MarkdownFlavor;
1536
1537 let content = r#"
1538[external](https://example.com)
1539[mailto](mailto:test@example.com)
1540[local](./local.md)
1541[fragment](#section)
1542[absolute](/docs/page.md)
1543"#;
1544 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1545 let links = extract_cross_file_links(&ctx);
1546
1547 assert_eq!(links.len(), 1);
1549 assert_eq!(links[0].target_path, "./local.md");
1550 }
1551
1552 #[test]
1553 fn test_extract_cross_file_links_skips_non_markdown() {
1554 use crate::config::MarkdownFlavor;
1555
1556 let content = r#"
1557[image](./photo.png)
1558[doc](./readme.md)
1559[pdf](./document.pdf)
1560"#;
1561 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1562 let links = extract_cross_file_links(&ctx);
1563
1564 assert_eq!(links.len(), 1);
1566 assert_eq!(links[0].target_path, "./readme.md");
1567 }
1568
1569 #[test]
1570 fn test_extract_cross_file_links_skips_code_spans() {
1571 use crate::config::MarkdownFlavor;
1572
1573 let content = "Normal [link](./file.md) and `[code](./ignored.md)` here.\n";
1574 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1575 let links = extract_cross_file_links(&ctx);
1576
1577 assert_eq!(links.len(), 1);
1579 assert_eq!(links[0].target_path, "./file.md");
1580 }
1581
1582 #[test]
1583 fn test_extract_cross_file_links_with_query_params() {
1584 use crate::config::MarkdownFlavor;
1585
1586 let content = "See [doc](./file.md?raw=true) here.\n";
1587 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1588 let links = extract_cross_file_links(&ctx);
1589
1590 assert_eq!(links.len(), 1);
1591 assert_eq!(links[0].target_path, "./file.md");
1593 }
1594
1595 #[test]
1596 fn test_extract_cross_file_links_empty_content() {
1597 use crate::config::MarkdownFlavor;
1598
1599 let content = "";
1600 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1601 let links = extract_cross_file_links(&ctx);
1602
1603 assert!(links.is_empty());
1604 }
1605
1606 #[test]
1607 fn test_extract_cross_file_links_no_links() {
1608 use crate::config::MarkdownFlavor;
1609
1610 let content = "# Just a heading\n\nSome text without links.\n";
1611 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1612 let links = extract_cross_file_links(&ctx);
1613
1614 assert!(links.is_empty());
1615 }
1616
1617 #[test]
1618 fn test_extract_cross_file_links_position_accuracy_issue_234() {
1619 use crate::config::MarkdownFlavor;
1622
1623 let content = r#"# Test Document
1624
1625Here is a [broken link](nonexistent-file.md) that should trigger MD057.
1626
1627And another [link](also-missing.md) on this line.
1628"#;
1629 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1630 let links = extract_cross_file_links(&ctx);
1631
1632 assert_eq!(links.len(), 2);
1633
1634 assert_eq!(links[0].target_path, "nonexistent-file.md");
1636 assert_eq!(links[0].line, 3);
1637 assert_eq!(links[0].column, 25);
1638
1639 assert_eq!(links[1].target_path, "also-missing.md");
1641 assert_eq!(links[1].line, 5);
1642 assert_eq!(links[1].column, 20);
1643 }
1644}