1use regex::Regex;
22use serde::{Deserialize, Serialize};
23use std::collections::{HashMap, HashSet};
24use std::path::{Path, PathBuf};
25use std::sync::LazyLock;
26
27use crate::lint_context::LintContext;
28
29fn hex_digit_to_value(c: u8) -> Option<u8> {
35 match c {
36 b'0'..=b'9' => Some(c - b'0'),
37 b'a'..=b'f' => Some(c - b'a' + 10),
38 b'A'..=b'F' => Some(c - b'A' + 10),
39 _ => None,
40 }
41}
42
43fn url_decode(s: &str) -> String {
47 if !s.contains('%') {
49 return s.to_string();
50 }
51
52 let bytes = s.as_bytes();
53 let mut result = Vec::with_capacity(bytes.len());
54 let mut i = 0;
55
56 while i < bytes.len() {
57 if bytes[i] == b'%' && i + 2 < bytes.len() {
58 let hex1 = bytes[i + 1];
60 let hex2 = bytes[i + 2];
61 if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
62 result.push(d1 * 16 + d2);
63 i += 3;
64 continue;
65 }
66 }
67 result.push(bytes[i]);
68 i += 1;
69 }
70
71 String::from_utf8(result).unwrap_or_else(|_| s.to_string())
73}
74
75static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
85
86static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
89 LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
90
91static URL_EXTRACT_REGEX: LazyLock<Regex> =
94 LazyLock::new(|| Regex::new(r#"]\(\s*([^>)\s#]+)(#[^)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
95
96pub(crate) static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
98 LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
99
100const MARKDOWN_EXTENSIONS: &[&str] = &[
102 ".md",
103 ".markdown",
104 ".mdx",
105 ".mkd",
106 ".mkdn",
107 ".mdown",
108 ".mdwn",
109 ".qmd",
110 ".rmd",
111];
112
113#[inline]
115fn is_markdown_file(path: &str) -> bool {
116 let path_lower = path.to_lowercase();
117 MARKDOWN_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext))
118}
119
120fn strip_query_and_fragment(url: &str) -> &str {
123 let query_pos = url.find('?');
124 let fragment_pos = url.find('#');
125
126 match (query_pos, fragment_pos) {
127 (Some(q), Some(f)) => &url[..q.min(f)],
128 (Some(q), None) => &url[..q],
129 (None, Some(f)) => &url[..f],
130 (None, None) => url,
131 }
132}
133
134pub fn extract_cross_file_links(ctx: &LintContext) -> Vec<CrossFileLinkIndex> {
142 let content = ctx.content;
143
144 if content.is_empty() || !content.contains("](") {
146 return Vec::new();
147 }
148
149 let mut links = Vec::new();
150 let lines: Vec<&str> = content.lines().collect();
151 let line_index = &ctx.line_index;
152
153 let mut processed_lines = HashSet::new();
156
157 for link in &ctx.links {
158 let line_idx = link.line - 1;
159 if line_idx >= lines.len() {
160 continue;
161 }
162
163 if !processed_lines.insert(line_idx) {
165 continue;
166 }
167
168 let line = lines[line_idx];
169 if !line.contains("](") {
170 continue;
171 }
172
173 for link_match in LINK_START_REGEX.find_iter(line) {
175 let start_pos = link_match.start();
176 let end_pos = link_match.end();
177
178 let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
180 let absolute_start_pos = line_start_byte + start_pos;
181
182 if ctx.is_in_code_span_byte(absolute_start_pos) {
184 continue;
185 }
186
187 let caps_result = URL_EXTRACT_ANGLE_BRACKET_REGEX
190 .captures_at(line, end_pos - 1)
191 .or_else(|| URL_EXTRACT_REGEX.captures_at(line, end_pos - 1));
192
193 if let Some(caps) = caps_result
194 && let Some(url_group) = caps.get(1)
195 {
196 let file_path = url_group.as_str().trim();
197
198 if file_path.is_empty()
201 || PROTOCOL_DOMAIN_REGEX.is_match(file_path)
202 || file_path.starts_with("www.")
203 || file_path.starts_with('#')
204 || file_path.starts_with("{{")
205 || file_path.starts_with("{%")
206 || file_path.starts_with('/')
207 || file_path.starts_with('~')
208 || file_path.starts_with('@')
209 || (file_path.starts_with('`') && file_path.ends_with('`'))
210 {
211 continue;
212 }
213
214 let file_path = strip_query_and_fragment(file_path);
216
217 let fragment = caps.get(2).map_or("", |m| m.as_str().trim_start_matches('#'));
219
220 if is_markdown_file(file_path) {
222 links.push(CrossFileLinkIndex {
223 target_path: file_path.to_string(),
224 fragment: fragment.to_string(),
225 line: link.line,
226 column: url_group.start() + 1,
227 });
228 }
229 }
230 }
231 }
232
233 links
234}
235
236#[cfg(feature = "native")]
238const CACHE_MAGIC: &[u8; 4] = b"RWSI";
239
240#[cfg(feature = "native")]
242const CACHE_FORMAT_VERSION: u32 = 6;
243
244#[cfg(feature = "native")]
246const CACHE_FILE_NAME: &str = "workspace_index.bin";
247
248#[derive(Debug, Default, Clone, Serialize, Deserialize)]
253pub struct WorkspaceIndex {
254 files: HashMap<PathBuf, FileIndex>,
256 reverse_deps: HashMap<PathBuf, HashSet<PathBuf>>,
259 version: u64,
261}
262
263#[derive(Debug, Clone, Default, Serialize, Deserialize)]
265pub struct FileIndex {
266 pub headings: Vec<HeadingIndex>,
268 pub reference_links: Vec<ReferenceLinkIndex>,
270 pub cross_file_links: Vec<CrossFileLinkIndex>,
272 pub defined_references: HashSet<String>,
275 pub content_hash: String,
277 anchor_to_heading: HashMap<String, usize>,
280 #[serde(default)]
284 anchor_to_heading_exact: HashMap<String, usize>,
285 html_anchors: HashSet<String>,
288 #[serde(default)]
291 html_anchors_exact: HashSet<String>,
292 attribute_anchors: HashSet<String>,
296 #[serde(default)]
299 attribute_anchors_exact: HashSet<String>,
300 pub file_disabled_rules: HashSet<String>,
303 pub persistent_transitions: Vec<(usize, HashSet<String>, HashSet<String>)>,
306 pub line_disabled_rules: HashMap<usize, HashSet<String>>,
308}
309
310#[derive(Debug, Clone, Serialize, Deserialize)]
312pub struct HeadingIndex {
313 pub text: String,
315 pub auto_anchor: String,
317 pub custom_anchor: Option<String>,
319 pub line: usize,
321 #[serde(default)]
323 pub is_setext: bool,
324}
325
326#[derive(Debug, Clone, Serialize, Deserialize)]
328pub struct ReferenceLinkIndex {
329 pub reference_id: String,
331 pub line: usize,
333 pub column: usize,
335}
336
337#[derive(Debug, Clone, Serialize, Deserialize)]
339pub struct CrossFileLinkIndex {
340 pub target_path: String,
342 pub fragment: String,
344 pub line: usize,
346 pub column: usize,
348}
349
350#[derive(Debug, Clone, Serialize, Deserialize)]
352pub struct VulnerableAnchor {
353 pub file: PathBuf,
355 pub line: usize,
357 pub text: String,
359}
360
361impl WorkspaceIndex {
362 pub fn new() -> Self {
364 Self::default()
365 }
366
367 pub fn version(&self) -> u64 {
369 self.version
370 }
371
372 pub fn file_count(&self) -> usize {
374 self.files.len()
375 }
376
377 pub fn contains_file(&self, path: &Path) -> bool {
379 self.files.contains_key(path)
380 }
381
382 pub fn get_file(&self, path: &Path) -> Option<&FileIndex> {
384 self.files.get(path)
385 }
386
387 pub fn insert_file(&mut self, path: PathBuf, index: FileIndex) {
389 self.files.insert(path, index);
390 self.version = self.version.wrapping_add(1);
391 }
392
393 pub fn remove_file(&mut self, path: &Path) -> Option<FileIndex> {
395 self.clear_reverse_deps_for(path);
397
398 let result = self.files.remove(path);
399 if result.is_some() {
400 self.version = self.version.wrapping_add(1);
401 }
402 result
403 }
404
405 pub fn get_vulnerable_anchors(&self) -> HashMap<String, Vec<VulnerableAnchor>> {
415 let mut vulnerable: HashMap<String, Vec<VulnerableAnchor>> = HashMap::new();
416
417 for (file_path, file_index) in &self.files {
418 for heading in &file_index.headings {
419 if heading.custom_anchor.is_none() && !heading.auto_anchor.is_empty() {
421 let anchor_key = heading.auto_anchor.to_lowercase();
422 vulnerable.entry(anchor_key).or_default().push(VulnerableAnchor {
423 file: file_path.clone(),
424 line: heading.line,
425 text: heading.text.clone(),
426 });
427 }
428 }
429 }
430
431 vulnerable
432 }
433
434 pub fn all_headings(&self) -> impl Iterator<Item = (&Path, &HeadingIndex)> {
436 self.files
437 .iter()
438 .flat_map(|(path, index)| index.headings.iter().map(move |h| (path.as_path(), h)))
439 }
440
441 pub fn files(&self) -> impl Iterator<Item = (&Path, &FileIndex)> {
443 self.files.iter().map(|(p, i)| (p.as_path(), i))
444 }
445
446 pub fn clear(&mut self) {
448 self.files.clear();
449 self.reverse_deps.clear();
450 self.version = self.version.wrapping_add(1);
451 }
452
453 pub fn update_file(&mut self, path: &Path, index: FileIndex) {
460 self.clear_reverse_deps_as_source(path);
463
464 for link in &index.cross_file_links {
466 let target = self.resolve_target_path(path, &link.target_path);
467 self.reverse_deps.entry(target).or_default().insert(path.to_path_buf());
468 }
469
470 self.files.insert(path.to_path_buf(), index);
471 self.version = self.version.wrapping_add(1);
472 }
473
474 pub fn get_dependents(&self, path: &Path) -> Vec<PathBuf> {
479 self.reverse_deps
480 .get(path)
481 .map(|set| set.iter().cloned().collect())
482 .unwrap_or_default()
483 }
484
485 pub fn is_file_stale(&self, path: &Path, current_hash: &str) -> bool {
489 self.files.get(path).is_none_or(|f| f.content_hash != current_hash)
490 }
491
492 pub fn retain_only(&mut self, current_files: &std::collections::HashSet<PathBuf>) -> usize {
497 let before_count = self.files.len();
498
499 let to_remove: Vec<PathBuf> = self
501 .files
502 .keys()
503 .filter(|path| !current_files.contains(*path))
504 .cloned()
505 .collect();
506
507 for path in &to_remove {
509 self.remove_file(path);
510 }
511
512 before_count - self.files.len()
513 }
514
515 #[cfg(feature = "native")]
522 pub fn save_to_cache(&self, cache_dir: &Path) -> std::io::Result<()> {
523 use std::fs;
524 use std::io::Write;
525
526 fs::create_dir_all(cache_dir)?;
528
529 let encoded = postcard::to_allocvec(self)
531 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
532
533 let mut cache_data = Vec::with_capacity(8 + encoded.len());
535 cache_data.extend_from_slice(CACHE_MAGIC);
536 cache_data.extend_from_slice(&CACHE_FORMAT_VERSION.to_le_bytes());
537 cache_data.extend_from_slice(&encoded);
538
539 let final_path = cache_dir.join(CACHE_FILE_NAME);
541 let temp_path = cache_dir.join(format!("{}.tmp.{}", CACHE_FILE_NAME, std::process::id()));
542
543 {
545 let mut file = fs::File::create(&temp_path)?;
546 file.write_all(&cache_data)?;
547 file.sync_all()?;
548 }
549
550 fs::rename(&temp_path, &final_path)?;
552
553 log::debug!(
554 "Saved workspace index to cache: {} files, {} bytes (format v{})",
555 self.files.len(),
556 cache_data.len(),
557 CACHE_FORMAT_VERSION
558 );
559
560 Ok(())
561 }
562
563 #[cfg(feature = "native")]
571 pub fn load_from_cache(cache_dir: &Path) -> Option<Self> {
572 use std::fs;
573
574 let path = cache_dir.join(CACHE_FILE_NAME);
575 let data = fs::read(&path).ok()?;
576
577 if data.len() < 8 {
579 log::warn!("Workspace index cache too small, discarding");
580 let _ = fs::remove_file(&path);
581 return None;
582 }
583
584 if &data[0..4] != CACHE_MAGIC {
586 log::warn!("Workspace index cache has invalid magic header, discarding");
587 let _ = fs::remove_file(&path);
588 return None;
589 }
590
591 let version = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
593 if version != CACHE_FORMAT_VERSION {
594 log::info!(
595 "Workspace index cache format version mismatch (got {version}, expected {CACHE_FORMAT_VERSION}), rebuilding"
596 );
597 let _ = fs::remove_file(&path);
598 return None;
599 }
600
601 match postcard::from_bytes::<Self>(&data[8..]) {
603 Ok(index) => {
604 log::debug!(
605 "Loaded workspace index from cache: {} files (format v{})",
606 index.files.len(),
607 version
608 );
609 Some(index)
610 }
611 Err(e) => {
612 log::warn!("Failed to deserialize workspace index cache: {e}");
613 let _ = fs::remove_file(&path);
614 None
615 }
616 }
617 }
618
619 fn clear_reverse_deps_as_source(&mut self, path: &Path) {
624 for deps in self.reverse_deps.values_mut() {
625 deps.remove(path);
626 }
627 self.reverse_deps.retain(|_, deps| !deps.is_empty());
629 }
630
631 fn clear_reverse_deps_for(&mut self, path: &Path) {
636 self.clear_reverse_deps_as_source(path);
638
639 self.reverse_deps.remove(path);
641 }
642
643 fn resolve_target_path(&self, source_file: &Path, relative_target: &str) -> PathBuf {
645 let source_dir = source_file.parent().unwrap_or(Path::new(""));
647
648 let target = source_dir.join(relative_target);
650
651 Self::normalize_path(&target)
653 }
654
655 fn normalize_path(path: &Path) -> PathBuf {
657 let mut components = Vec::new();
658
659 for component in path.components() {
660 match component {
661 std::path::Component::ParentDir => {
662 if !components.is_empty() {
664 components.pop();
665 }
666 }
667 std::path::Component::CurDir => {
668 }
670 _ => {
671 components.push(component);
672 }
673 }
674 }
675
676 components.iter().collect()
677 }
678}
679
680impl FileIndex {
681 pub fn new() -> Self {
683 Self::default()
684 }
685
686 pub fn with_hash(content_hash: String) -> Self {
688 Self {
689 content_hash,
690 ..Default::default()
691 }
692 }
693
694 pub fn add_heading(&mut self, heading: HeadingIndex) {
700 let index = self.headings.len();
701
702 self.anchor_to_heading.insert(heading.auto_anchor.to_lowercase(), index);
705 self.anchor_to_heading_exact.insert(heading.auto_anchor.clone(), index);
706
707 if let Some(ref custom) = heading.custom_anchor {
709 self.anchor_to_heading.insert(custom.to_lowercase(), index);
710 self.anchor_to_heading_exact.insert(custom.clone(), index);
711 }
712
713 self.headings.push(heading);
714 }
715
716 pub fn add_anchor_alias(&mut self, anchor: &str, heading_index: usize) {
719 if heading_index < self.headings.len() {
720 self.anchor_to_heading.insert(anchor.to_lowercase(), heading_index);
721 self.anchor_to_heading_exact.insert(anchor.to_string(), heading_index);
722 }
723 }
724
725 pub fn has_anchor(&self, anchor: &str) -> bool {
736 self.has_anchor_with_case(anchor, true)
737 }
738
739 pub fn has_anchor_with_case(&self, anchor: &str, ignore_case: bool) -> bool {
748 if self.lookup_anchor(anchor, ignore_case) {
749 return true;
750 }
751
752 if anchor.contains('%') {
754 let decoded = url_decode(anchor);
755 if decoded != anchor {
756 return self.lookup_anchor(&decoded, ignore_case);
757 }
758 }
759
760 false
761 }
762
763 fn lookup_anchor(&self, anchor: &str, ignore_case: bool) -> bool {
766 if ignore_case {
767 let lower = anchor.to_lowercase();
768 self.anchor_to_heading.contains_key(&lower)
769 || self.html_anchors.contains(&lower)
770 || self.attribute_anchors.contains(&lower)
771 } else {
772 self.anchor_to_heading_exact.contains_key(anchor)
773 || self.html_anchors_exact.contains(anchor)
774 || self.attribute_anchors_exact.contains(anchor)
775 }
776 }
777
778 pub fn add_html_anchor(&mut self, anchor: &str) {
781 if !anchor.is_empty() {
782 self.html_anchors.insert(anchor.to_lowercase());
783 self.html_anchors_exact.insert(anchor.to_string());
784 }
785 }
786
787 pub fn add_attribute_anchor(&mut self, anchor: &str) {
790 if !anchor.is_empty() {
791 self.attribute_anchors.insert(anchor.to_lowercase());
792 self.attribute_anchors_exact.insert(anchor.to_string());
793 }
794 }
795
796 pub fn get_heading_by_anchor(&self, anchor: &str) -> Option<&HeadingIndex> {
800 self.anchor_to_heading
801 .get(&anchor.to_lowercase())
802 .and_then(|&idx| self.headings.get(idx))
803 }
804
805 pub fn add_reference_link(&mut self, link: ReferenceLinkIndex) {
807 self.reference_links.push(link);
808 }
809
810 pub fn is_rule_disabled_at_line(&self, rule_name: &str, line: usize) -> bool {
815 if self.file_disabled_rules.contains("*") || self.file_disabled_rules.contains(rule_name) {
817 return true;
818 }
819
820 if let Some(rules) = self.line_disabled_rules.get(&line)
822 && (rules.contains("*") || rules.contains(rule_name))
823 {
824 return true;
825 }
826
827 if !self.persistent_transitions.is_empty() {
829 let idx = match self.persistent_transitions.binary_search_by_key(&line, |t| t.0) {
830 Ok(i) => Some(i),
831 Err(i) => {
832 if i > 0 {
833 Some(i - 1)
834 } else {
835 None
836 }
837 }
838 };
839 if let Some(i) = idx {
840 let (_, ref disabled, ref enabled) = self.persistent_transitions[i];
841 if disabled.contains("*") {
842 return !enabled.contains(rule_name);
843 }
844 return disabled.contains(rule_name);
845 }
846 }
847
848 false
849 }
850
851 pub fn add_cross_file_link(&mut self, link: CrossFileLinkIndex) {
853 let is_duplicate = self.cross_file_links.iter().any(|existing| {
856 existing.target_path == link.target_path && existing.fragment == link.fragment && existing.line == link.line
857 });
858 if !is_duplicate {
859 self.cross_file_links.push(link);
860 }
861 }
862
863 pub fn add_defined_reference(&mut self, ref_id: String) {
865 self.defined_references.insert(ref_id);
866 }
867
868 pub fn has_defined_reference(&self, ref_id: &str) -> bool {
870 self.defined_references.contains(ref_id)
871 }
872
873 pub fn hash_matches(&self, hash: &str) -> bool {
875 self.content_hash == hash
876 }
877
878 pub fn heading_count(&self) -> usize {
880 self.headings.len()
881 }
882
883 pub fn reference_link_count(&self) -> usize {
885 self.reference_links.len()
886 }
887}
888
889#[cfg(test)]
890mod tests {
891 use super::*;
892
893 #[test]
894 fn test_workspace_index_basic() {
895 let mut index = WorkspaceIndex::new();
896 assert_eq!(index.file_count(), 0);
897 assert_eq!(index.version(), 0);
898
899 let mut file_index = FileIndex::with_hash("abc123".to_string());
900 file_index.add_heading(HeadingIndex {
901 text: "Installation".to_string(),
902 auto_anchor: "installation".to_string(),
903 custom_anchor: None,
904 line: 1,
905 is_setext: false,
906 });
907
908 index.insert_file(PathBuf::from("docs/install.md"), file_index);
909 assert_eq!(index.file_count(), 1);
910 assert_eq!(index.version(), 1);
911
912 assert!(index.contains_file(Path::new("docs/install.md")));
913 assert!(!index.contains_file(Path::new("docs/other.md")));
914 }
915
916 #[test]
917 fn test_vulnerable_anchors() {
918 let mut index = WorkspaceIndex::new();
919
920 let mut file1 = FileIndex::new();
922 file1.add_heading(HeadingIndex {
923 text: "Getting Started".to_string(),
924 auto_anchor: "getting-started".to_string(),
925 custom_anchor: None,
926 line: 1,
927 is_setext: false,
928 });
929 index.insert_file(PathBuf::from("docs/guide.md"), file1);
930
931 let mut file2 = FileIndex::new();
933 file2.add_heading(HeadingIndex {
934 text: "Installation".to_string(),
935 auto_anchor: "installation".to_string(),
936 custom_anchor: Some("install".to_string()),
937 line: 1,
938 is_setext: false,
939 });
940 index.insert_file(PathBuf::from("docs/install.md"), file2);
941
942 let vulnerable = index.get_vulnerable_anchors();
943 assert_eq!(vulnerable.len(), 1);
944 assert!(vulnerable.contains_key("getting-started"));
945 assert!(!vulnerable.contains_key("installation"));
946
947 let anchors = vulnerable.get("getting-started").unwrap();
948 assert_eq!(anchors.len(), 1);
949 assert_eq!(anchors[0].file, PathBuf::from("docs/guide.md"));
950 assert_eq!(anchors[0].text, "Getting Started");
951 }
952
953 #[test]
954 fn test_vulnerable_anchors_multiple_files_same_anchor() {
955 let mut index = WorkspaceIndex::new();
958
959 let mut file1 = FileIndex::new();
961 file1.add_heading(HeadingIndex {
962 text: "Installation".to_string(),
963 auto_anchor: "installation".to_string(),
964 custom_anchor: None,
965 line: 1,
966 is_setext: false,
967 });
968 index.insert_file(PathBuf::from("docs/en/guide.md"), file1);
969
970 let mut file2 = FileIndex::new();
972 file2.add_heading(HeadingIndex {
973 text: "Installation".to_string(),
974 auto_anchor: "installation".to_string(),
975 custom_anchor: None,
976 line: 5,
977 is_setext: false,
978 });
979 index.insert_file(PathBuf::from("docs/fr/guide.md"), file2);
980
981 let mut file3 = FileIndex::new();
983 file3.add_heading(HeadingIndex {
984 text: "Installation".to_string(),
985 auto_anchor: "installation".to_string(),
986 custom_anchor: Some("install".to_string()),
987 line: 10,
988 is_setext: false,
989 });
990 index.insert_file(PathBuf::from("docs/de/guide.md"), file3);
991
992 let vulnerable = index.get_vulnerable_anchors();
993 assert_eq!(vulnerable.len(), 1); assert!(vulnerable.contains_key("installation"));
995
996 let anchors = vulnerable.get("installation").unwrap();
997 assert_eq!(anchors.len(), 2, "Should collect both vulnerable anchors");
999
1000 let files: std::collections::HashSet<_> = anchors.iter().map(|a| &a.file).collect();
1002 assert!(files.contains(&PathBuf::from("docs/en/guide.md")));
1003 assert!(files.contains(&PathBuf::from("docs/fr/guide.md")));
1004 }
1005
1006 #[test]
1007 fn test_file_index_hash() {
1008 let index = FileIndex::with_hash("hash123".to_string());
1009 assert!(index.hash_matches("hash123"));
1010 assert!(!index.hash_matches("other"));
1011 }
1012
1013 #[test]
1014 fn test_version_increment() {
1015 let mut index = WorkspaceIndex::new();
1016 assert_eq!(index.version(), 0);
1017
1018 index.insert_file(PathBuf::from("a.md"), FileIndex::new());
1019 assert_eq!(index.version(), 1);
1020
1021 index.insert_file(PathBuf::from("b.md"), FileIndex::new());
1022 assert_eq!(index.version(), 2);
1023
1024 index.remove_file(Path::new("a.md"));
1025 assert_eq!(index.version(), 3);
1026
1027 index.remove_file(Path::new("nonexistent.md"));
1029 assert_eq!(index.version(), 3);
1030 }
1031
1032 #[test]
1033 fn test_reverse_deps_basic() {
1034 let mut index = WorkspaceIndex::new();
1035
1036 let mut file_a = FileIndex::new();
1038 file_a.add_cross_file_link(CrossFileLinkIndex {
1039 target_path: "b.md".to_string(),
1040 fragment: "section".to_string(),
1041 line: 10,
1042 column: 5,
1043 });
1044 index.update_file(Path::new("docs/a.md"), file_a);
1045
1046 let dependents = index.get_dependents(Path::new("docs/b.md"));
1048 assert_eq!(dependents.len(), 1);
1049 assert_eq!(dependents[0], PathBuf::from("docs/a.md"));
1050
1051 let a_dependents = index.get_dependents(Path::new("docs/a.md"));
1053 assert!(a_dependents.is_empty());
1054 }
1055
1056 #[test]
1057 fn test_reverse_deps_multiple() {
1058 let mut index = WorkspaceIndex::new();
1059
1060 let mut file_a = FileIndex::new();
1062 file_a.add_cross_file_link(CrossFileLinkIndex {
1063 target_path: "../b.md".to_string(),
1064 fragment: "".to_string(),
1065 line: 1,
1066 column: 1,
1067 });
1068 index.update_file(Path::new("docs/sub/a.md"), file_a);
1069
1070 let mut file_c = FileIndex::new();
1071 file_c.add_cross_file_link(CrossFileLinkIndex {
1072 target_path: "b.md".to_string(),
1073 fragment: "".to_string(),
1074 line: 1,
1075 column: 1,
1076 });
1077 index.update_file(Path::new("docs/c.md"), file_c);
1078
1079 let dependents = index.get_dependents(Path::new("docs/b.md"));
1081 assert_eq!(dependents.len(), 2);
1082 assert!(dependents.contains(&PathBuf::from("docs/sub/a.md")));
1083 assert!(dependents.contains(&PathBuf::from("docs/c.md")));
1084 }
1085
1086 #[test]
1087 fn test_reverse_deps_update_clears_old() {
1088 let mut index = WorkspaceIndex::new();
1089
1090 let mut file_a = FileIndex::new();
1092 file_a.add_cross_file_link(CrossFileLinkIndex {
1093 target_path: "b.md".to_string(),
1094 fragment: "".to_string(),
1095 line: 1,
1096 column: 1,
1097 });
1098 index.update_file(Path::new("docs/a.md"), file_a);
1099
1100 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1102
1103 let mut file_a_updated = FileIndex::new();
1105 file_a_updated.add_cross_file_link(CrossFileLinkIndex {
1106 target_path: "c.md".to_string(),
1107 fragment: "".to_string(),
1108 line: 1,
1109 column: 1,
1110 });
1111 index.update_file(Path::new("docs/a.md"), file_a_updated);
1112
1113 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1115
1116 let c_deps = index.get_dependents(Path::new("docs/c.md"));
1118 assert_eq!(c_deps.len(), 1);
1119 assert_eq!(c_deps[0], PathBuf::from("docs/a.md"));
1120 }
1121
1122 #[test]
1123 fn test_reverse_deps_remove_file() {
1124 let mut index = WorkspaceIndex::new();
1125
1126 let mut file_a = FileIndex::new();
1128 file_a.add_cross_file_link(CrossFileLinkIndex {
1129 target_path: "b.md".to_string(),
1130 fragment: "".to_string(),
1131 line: 1,
1132 column: 1,
1133 });
1134 index.update_file(Path::new("docs/a.md"), file_a);
1135
1136 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1138
1139 index.remove_file(Path::new("docs/a.md"));
1141
1142 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1144 }
1145
1146 #[test]
1147 fn test_normalize_path() {
1148 let path = Path::new("docs/sub/../other.md");
1150 let normalized = WorkspaceIndex::normalize_path(path);
1151 assert_eq!(normalized, PathBuf::from("docs/other.md"));
1152
1153 let path2 = Path::new("docs/./other.md");
1155 let normalized2 = WorkspaceIndex::normalize_path(path2);
1156 assert_eq!(normalized2, PathBuf::from("docs/other.md"));
1157
1158 let path3 = Path::new("a/b/c/../../d.md");
1160 let normalized3 = WorkspaceIndex::normalize_path(path3);
1161 assert_eq!(normalized3, PathBuf::from("a/d.md"));
1162 }
1163
1164 #[test]
1165 fn test_clear_clears_reverse_deps() {
1166 let mut index = WorkspaceIndex::new();
1167
1168 let mut file_a = FileIndex::new();
1170 file_a.add_cross_file_link(CrossFileLinkIndex {
1171 target_path: "b.md".to_string(),
1172 fragment: "".to_string(),
1173 line: 1,
1174 column: 1,
1175 });
1176 index.update_file(Path::new("docs/a.md"), file_a);
1177
1178 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1180
1181 index.clear();
1183
1184 assert_eq!(index.file_count(), 0);
1186 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1187 }
1188
1189 #[test]
1190 fn test_is_file_stale() {
1191 let mut index = WorkspaceIndex::new();
1192
1193 assert!(index.is_file_stale(Path::new("nonexistent.md"), "hash123"));
1195
1196 let file_index = FileIndex::with_hash("hash123".to_string());
1198 index.insert_file(PathBuf::from("docs/test.md"), file_index);
1199
1200 assert!(!index.is_file_stale(Path::new("docs/test.md"), "hash123"));
1202
1203 assert!(index.is_file_stale(Path::new("docs/test.md"), "different_hash"));
1205 }
1206
1207 #[cfg(feature = "native")]
1208 #[test]
1209 fn test_cache_roundtrip() {
1210 use std::fs;
1211
1212 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_roundtrip");
1214 let _ = fs::remove_dir_all(&temp_dir);
1215 fs::create_dir_all(&temp_dir).unwrap();
1216
1217 let mut index = WorkspaceIndex::new();
1219
1220 let mut file1 = FileIndex::with_hash("abc123".to_string());
1221 file1.add_heading(HeadingIndex {
1222 text: "Test Heading".to_string(),
1223 auto_anchor: "test-heading".to_string(),
1224 custom_anchor: Some("test".to_string()),
1225 line: 1,
1226 is_setext: false,
1227 });
1228 file1.add_cross_file_link(CrossFileLinkIndex {
1229 target_path: "./other.md".to_string(),
1230 fragment: "section".to_string(),
1231 line: 5,
1232 column: 3,
1233 });
1234 index.update_file(Path::new("docs/file1.md"), file1);
1235
1236 let mut file2 = FileIndex::with_hash("def456".to_string());
1237 file2.add_heading(HeadingIndex {
1238 text: "Another Heading".to_string(),
1239 auto_anchor: "another-heading".to_string(),
1240 custom_anchor: None,
1241 line: 1,
1242 is_setext: false,
1243 });
1244 index.update_file(Path::new("docs/other.md"), file2);
1245
1246 index.save_to_cache(&temp_dir).expect("Failed to save cache");
1248
1249 assert!(temp_dir.join("workspace_index.bin").exists());
1251
1252 let loaded = WorkspaceIndex::load_from_cache(&temp_dir).expect("Failed to load cache");
1254
1255 assert_eq!(loaded.file_count(), 2);
1257 assert!(loaded.contains_file(Path::new("docs/file1.md")));
1258 assert!(loaded.contains_file(Path::new("docs/other.md")));
1259
1260 let file1_loaded = loaded.get_file(Path::new("docs/file1.md")).unwrap();
1262 assert_eq!(file1_loaded.content_hash, "abc123");
1263 assert_eq!(file1_loaded.headings.len(), 1);
1264 assert_eq!(file1_loaded.headings[0].text, "Test Heading");
1265 assert_eq!(file1_loaded.headings[0].custom_anchor, Some("test".to_string()));
1266 assert_eq!(file1_loaded.cross_file_links.len(), 1);
1267 assert_eq!(file1_loaded.cross_file_links[0].target_path, "./other.md");
1268
1269 let dependents = loaded.get_dependents(Path::new("docs/other.md"));
1271 assert_eq!(dependents.len(), 1);
1272 assert_eq!(dependents[0], PathBuf::from("docs/file1.md"));
1273
1274 let _ = fs::remove_dir_all(&temp_dir);
1276 }
1277
1278 #[cfg(feature = "native")]
1279 #[test]
1280 fn test_cache_missing_file() {
1281 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_missing");
1282 let _ = std::fs::remove_dir_all(&temp_dir);
1283
1284 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1286 assert!(result.is_none());
1287 }
1288
1289 #[cfg(feature = "native")]
1290 #[test]
1291 fn test_cache_corrupted_file() {
1292 use std::fs;
1293
1294 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_corrupted");
1295 let _ = fs::remove_dir_all(&temp_dir);
1296 fs::create_dir_all(&temp_dir).unwrap();
1297
1298 fs::write(temp_dir.join("workspace_index.bin"), b"bad").unwrap();
1300
1301 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1303 assert!(result.is_none());
1304
1305 assert!(!temp_dir.join("workspace_index.bin").exists());
1307
1308 let _ = fs::remove_dir_all(&temp_dir);
1310 }
1311
1312 #[cfg(feature = "native")]
1313 #[test]
1314 fn test_cache_invalid_magic() {
1315 use std::fs;
1316
1317 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_invalid_magic");
1318 let _ = fs::remove_dir_all(&temp_dir);
1319 fs::create_dir_all(&temp_dir).unwrap();
1320
1321 let mut data = Vec::new();
1323 data.extend_from_slice(b"XXXX"); data.extend_from_slice(&1u32.to_le_bytes()); data.extend_from_slice(&[0; 100]); fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
1327
1328 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1330 assert!(result.is_none());
1331
1332 assert!(!temp_dir.join("workspace_index.bin").exists());
1334
1335 let _ = fs::remove_dir_all(&temp_dir);
1337 }
1338
1339 #[cfg(feature = "native")]
1340 #[test]
1341 fn test_cache_version_mismatch() {
1342 use std::fs;
1343
1344 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_version_mismatch");
1345 let _ = fs::remove_dir_all(&temp_dir);
1346 fs::create_dir_all(&temp_dir).unwrap();
1347
1348 let mut data = Vec::new();
1350 data.extend_from_slice(b"RWSI"); data.extend_from_slice(&999u32.to_le_bytes()); data.extend_from_slice(&[0; 100]); fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
1354
1355 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1357 assert!(result.is_none());
1358
1359 assert!(!temp_dir.join("workspace_index.bin").exists());
1361
1362 let _ = fs::remove_dir_all(&temp_dir);
1364 }
1365
1366 #[cfg(feature = "native")]
1367 #[test]
1368 fn test_cache_atomic_write() {
1369 use std::fs;
1370
1371 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_atomic");
1373 let _ = fs::remove_dir_all(&temp_dir);
1374 fs::create_dir_all(&temp_dir).unwrap();
1375
1376 let index = WorkspaceIndex::new();
1377 index.save_to_cache(&temp_dir).expect("Failed to save");
1378
1379 let entries: Vec<_> = fs::read_dir(&temp_dir).unwrap().collect();
1381 assert_eq!(entries.len(), 1);
1382 assert!(temp_dir.join("workspace_index.bin").exists());
1383
1384 let _ = fs::remove_dir_all(&temp_dir);
1386 }
1387
1388 #[test]
1389 fn test_has_anchor_auto_generated() {
1390 let mut file_index = FileIndex::new();
1391 file_index.add_heading(HeadingIndex {
1392 text: "Installation Guide".to_string(),
1393 auto_anchor: "installation-guide".to_string(),
1394 custom_anchor: None,
1395 line: 1,
1396 is_setext: false,
1397 });
1398
1399 assert!(file_index.has_anchor("installation-guide"));
1401
1402 assert!(file_index.has_anchor("Installation-Guide"));
1404 assert!(file_index.has_anchor("INSTALLATION-GUIDE"));
1405
1406 assert!(!file_index.has_anchor("nonexistent"));
1408 }
1409
1410 #[test]
1411 fn test_has_anchor_custom() {
1412 let mut file_index = FileIndex::new();
1413 file_index.add_heading(HeadingIndex {
1414 text: "Installation Guide".to_string(),
1415 auto_anchor: "installation-guide".to_string(),
1416 custom_anchor: Some("install".to_string()),
1417 line: 1,
1418 is_setext: false,
1419 });
1420
1421 assert!(file_index.has_anchor("installation-guide"));
1423
1424 assert!(file_index.has_anchor("install"));
1426 assert!(file_index.has_anchor("Install")); assert!(!file_index.has_anchor("nonexistent"));
1430 }
1431
1432 #[test]
1433 fn test_get_heading_by_anchor() {
1434 let mut file_index = FileIndex::new();
1435 file_index.add_heading(HeadingIndex {
1436 text: "Installation Guide".to_string(),
1437 auto_anchor: "installation-guide".to_string(),
1438 custom_anchor: Some("install".to_string()),
1439 line: 10,
1440 is_setext: false,
1441 });
1442 file_index.add_heading(HeadingIndex {
1443 text: "Configuration".to_string(),
1444 auto_anchor: "configuration".to_string(),
1445 custom_anchor: None,
1446 line: 20,
1447 is_setext: false,
1448 });
1449
1450 let heading = file_index.get_heading_by_anchor("installation-guide");
1452 assert!(heading.is_some());
1453 assert_eq!(heading.unwrap().text, "Installation Guide");
1454 assert_eq!(heading.unwrap().line, 10);
1455
1456 let heading = file_index.get_heading_by_anchor("install");
1458 assert!(heading.is_some());
1459 assert_eq!(heading.unwrap().text, "Installation Guide");
1460
1461 let heading = file_index.get_heading_by_anchor("configuration");
1463 assert!(heading.is_some());
1464 assert_eq!(heading.unwrap().text, "Configuration");
1465 assert_eq!(heading.unwrap().line, 20);
1466
1467 assert!(file_index.get_heading_by_anchor("nonexistent").is_none());
1469 }
1470
1471 #[test]
1472 fn test_anchor_lookup_many_headings() {
1473 let mut file_index = FileIndex::new();
1475
1476 for i in 0..100 {
1478 file_index.add_heading(HeadingIndex {
1479 text: format!("Heading {i}"),
1480 auto_anchor: format!("heading-{i}"),
1481 custom_anchor: Some(format!("h{i}")),
1482 line: i + 1,
1483 is_setext: false,
1484 });
1485 }
1486
1487 for i in 0..100 {
1489 assert!(file_index.has_anchor(&format!("heading-{i}")));
1490 assert!(file_index.has_anchor(&format!("h{i}")));
1491
1492 let heading = file_index.get_heading_by_anchor(&format!("heading-{i}"));
1493 assert!(heading.is_some());
1494 assert_eq!(heading.unwrap().line, i + 1);
1495 }
1496 }
1497
1498 #[test]
1503 fn test_extract_cross_file_links_basic() {
1504 use crate::config::MarkdownFlavor;
1505
1506 let content = "# Test\n\nSee [link](./other.md) for info.\n";
1507 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1508 let links = extract_cross_file_links(&ctx);
1509
1510 assert_eq!(links.len(), 1);
1511 assert_eq!(links[0].target_path, "./other.md");
1512 assert_eq!(links[0].fragment, "");
1513 assert_eq!(links[0].line, 3);
1514 assert_eq!(links[0].column, 12);
1516 }
1517
1518 #[test]
1519 fn test_extract_cross_file_links_with_fragment() {
1520 use crate::config::MarkdownFlavor;
1521
1522 let content = "Check [guide](./guide.md#install) here.\n";
1523 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1524 let links = extract_cross_file_links(&ctx);
1525
1526 assert_eq!(links.len(), 1);
1527 assert_eq!(links[0].target_path, "./guide.md");
1528 assert_eq!(links[0].fragment, "install");
1529 assert_eq!(links[0].line, 1);
1530 assert_eq!(links[0].column, 15);
1532 }
1533
1534 #[test]
1535 fn test_extract_cross_file_links_multiple_on_same_line() {
1536 use crate::config::MarkdownFlavor;
1537
1538 let content = "See [a](a.md) and [b](b.md) here.\n";
1539 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1540 let links = extract_cross_file_links(&ctx);
1541
1542 assert_eq!(links.len(), 2);
1543
1544 assert_eq!(links[0].target_path, "a.md");
1545 assert_eq!(links[0].line, 1);
1546 assert_eq!(links[0].column, 9);
1548
1549 assert_eq!(links[1].target_path, "b.md");
1550 assert_eq!(links[1].line, 1);
1551 assert_eq!(links[1].column, 23);
1553 }
1554
1555 #[test]
1556 fn test_extract_cross_file_links_angle_brackets() {
1557 use crate::config::MarkdownFlavor;
1558
1559 let content = "See [link](<path/with (parens).md>) here.\n";
1560 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1561 let links = extract_cross_file_links(&ctx);
1562
1563 assert_eq!(links.len(), 1);
1564 assert_eq!(links[0].target_path, "path/with (parens).md");
1565 assert_eq!(links[0].line, 1);
1566 assert_eq!(links[0].column, 13);
1568 }
1569
1570 #[test]
1571 fn test_extract_cross_file_links_skips_external() {
1572 use crate::config::MarkdownFlavor;
1573
1574 let content = r#"
1575[external](https://example.com)
1576[mailto](mailto:test@example.com)
1577[local](./local.md)
1578[fragment](#section)
1579[absolute](/docs/page.md)
1580"#;
1581 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1582 let links = extract_cross_file_links(&ctx);
1583
1584 assert_eq!(links.len(), 1);
1586 assert_eq!(links[0].target_path, "./local.md");
1587 }
1588
1589 #[test]
1590 fn test_extract_cross_file_links_skips_non_markdown() {
1591 use crate::config::MarkdownFlavor;
1592
1593 let content = r#"
1594[image](./photo.png)
1595[doc](./readme.md)
1596[pdf](./document.pdf)
1597"#;
1598 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1599 let links = extract_cross_file_links(&ctx);
1600
1601 assert_eq!(links.len(), 1);
1603 assert_eq!(links[0].target_path, "./readme.md");
1604 }
1605
1606 #[test]
1607 fn test_extract_cross_file_links_skips_code_spans() {
1608 use crate::config::MarkdownFlavor;
1609
1610 let content = "Normal [link](./file.md) and `[code](./ignored.md)` here.\n";
1611 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1612 let links = extract_cross_file_links(&ctx);
1613
1614 assert_eq!(links.len(), 1);
1616 assert_eq!(links[0].target_path, "./file.md");
1617 }
1618
1619 #[test]
1620 fn test_extract_cross_file_links_with_query_params() {
1621 use crate::config::MarkdownFlavor;
1622
1623 let content = "See [doc](./file.md?raw=true) here.\n";
1624 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1625 let links = extract_cross_file_links(&ctx);
1626
1627 assert_eq!(links.len(), 1);
1628 assert_eq!(links[0].target_path, "./file.md");
1630 }
1631
1632 #[test]
1633 fn test_extract_cross_file_links_empty_content() {
1634 use crate::config::MarkdownFlavor;
1635
1636 let content = "";
1637 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1638 let links = extract_cross_file_links(&ctx);
1639
1640 assert!(links.is_empty());
1641 }
1642
1643 #[test]
1644 fn test_extract_cross_file_links_no_links() {
1645 use crate::config::MarkdownFlavor;
1646
1647 let content = "# Just a heading\n\nSome text without links.\n";
1648 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1649 let links = extract_cross_file_links(&ctx);
1650
1651 assert!(links.is_empty());
1652 }
1653
1654 #[test]
1655 fn test_extract_cross_file_links_position_accuracy_issue_234() {
1656 use crate::config::MarkdownFlavor;
1659
1660 let content = r#"# Test Document
1661
1662Here is a [broken link](nonexistent-file.md) that should trigger MD057.
1663
1664And another [link](also-missing.md) on this line.
1665"#;
1666 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1667 let links = extract_cross_file_links(&ctx);
1668
1669 assert_eq!(links.len(), 2);
1670
1671 assert_eq!(links[0].target_path, "nonexistent-file.md");
1673 assert_eq!(links[0].line, 3);
1674 assert_eq!(links[0].column, 25);
1675
1676 assert_eq!(links[1].target_path, "also-missing.md");
1678 assert_eq!(links[1].line, 5);
1679 assert_eq!(links[1].column, 20);
1680 }
1681}