1use regex::Regex;
22use serde::{Deserialize, Serialize};
23use std::collections::{HashMap, HashSet};
24use std::path::{Path, PathBuf};
25use std::sync::LazyLock;
26
27use crate::lint_context::LintContext;
28use crate::utils::element_cache::ElementCache;
29
30fn hex_digit_to_value(c: u8) -> Option<u8> {
36 match c {
37 b'0'..=b'9' => Some(c - b'0'),
38 b'a'..=b'f' => Some(c - b'a' + 10),
39 b'A'..=b'F' => Some(c - b'A' + 10),
40 _ => None,
41 }
42}
43
44fn url_decode(s: &str) -> String {
48 if !s.contains('%') {
50 return s.to_string();
51 }
52
53 let bytes = s.as_bytes();
54 let mut result = Vec::with_capacity(bytes.len());
55 let mut i = 0;
56
57 while i < bytes.len() {
58 if bytes[i] == b'%' && i + 2 < bytes.len() {
59 let hex1 = bytes[i + 1];
61 let hex2 = bytes[i + 2];
62 if let (Some(d1), Some(d2)) = (hex_digit_to_value(hex1), hex_digit_to_value(hex2)) {
63 result.push(d1 * 16 + d2);
64 i += 3;
65 continue;
66 }
67 }
68 result.push(bytes[i]);
69 i += 1;
70 }
71
72 String::from_utf8(result).unwrap_or_else(|_| s.to_string())
74}
75
76static LINK_START_REGEX: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"!?\[[^\]]*\]").unwrap());
86
87static URL_EXTRACT_ANGLE_BRACKET_REGEX: LazyLock<Regex> =
90 LazyLock::new(|| Regex::new(r#"\]\(\s*<([^>]+)>(#[^\)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
91
92static URL_EXTRACT_REGEX: LazyLock<Regex> =
95 LazyLock::new(|| Regex::new(r#"]\(\s*([^>)\s#]+)(#[^)\s]*)?\s*(?:"[^"]*")?\s*\)"#).unwrap());
96
97static PROTOCOL_DOMAIN_REGEX: LazyLock<Regex> =
99 LazyLock::new(|| Regex::new(r"^([a-zA-Z][a-zA-Z0-9+.-]*://|[a-zA-Z][a-zA-Z0-9+.-]*:|www\.)").unwrap());
100
101const MARKDOWN_EXTENSIONS: &[&str] = &[
103 ".md",
104 ".markdown",
105 ".mdx",
106 ".mkd",
107 ".mkdn",
108 ".mdown",
109 ".mdwn",
110 ".qmd",
111 ".rmd",
112];
113
114#[inline]
116fn is_markdown_file(path: &str) -> bool {
117 let path_lower = path.to_lowercase();
118 MARKDOWN_EXTENSIONS.iter().any(|ext| path_lower.ends_with(ext))
119}
120
121fn strip_query_and_fragment(url: &str) -> &str {
124 let query_pos = url.find('?');
125 let fragment_pos = url.find('#');
126
127 match (query_pos, fragment_pos) {
128 (Some(q), Some(f)) => &url[..q.min(f)],
129 (Some(q), None) => &url[..q],
130 (None, Some(f)) => &url[..f],
131 (None, None) => url,
132 }
133}
134
135pub fn extract_cross_file_links(ctx: &LintContext) -> Vec<CrossFileLinkIndex> {
143 let content = ctx.content;
144
145 if content.is_empty() || !content.contains("](") {
147 return Vec::new();
148 }
149
150 let mut links = Vec::new();
151 let lines: Vec<&str> = content.lines().collect();
152 let element_cache = ElementCache::new(content);
153 let line_index = &ctx.line_index;
154
155 let mut processed_lines = HashSet::new();
158
159 for link in &ctx.links {
160 let line_idx = link.line - 1;
161 if line_idx >= lines.len() {
162 continue;
163 }
164
165 if !processed_lines.insert(line_idx) {
167 continue;
168 }
169
170 let line = lines[line_idx];
171 if !line.contains("](") {
172 continue;
173 }
174
175 for link_match in LINK_START_REGEX.find_iter(line) {
177 let start_pos = link_match.start();
178 let end_pos = link_match.end();
179
180 let line_start_byte = line_index.get_line_start_byte(line_idx + 1).unwrap_or(0);
182 let absolute_start_pos = line_start_byte + start_pos;
183
184 if element_cache.is_in_code_span(absolute_start_pos) {
186 continue;
187 }
188
189 let caps_result = URL_EXTRACT_ANGLE_BRACKET_REGEX
192 .captures_at(line, end_pos - 1)
193 .or_else(|| URL_EXTRACT_REGEX.captures_at(line, end_pos - 1));
194
195 if let Some(caps) = caps_result
196 && let Some(url_group) = caps.get(1)
197 {
198 let file_path = url_group.as_str().trim();
199
200 if file_path.is_empty()
203 || PROTOCOL_DOMAIN_REGEX.is_match(file_path)
204 || file_path.starts_with("www.")
205 || file_path.starts_with('#')
206 || file_path.starts_with("{{")
207 || file_path.starts_with("{%")
208 || file_path.starts_with('/')
209 || file_path.starts_with('~')
210 || file_path.starts_with('@')
211 || (file_path.starts_with('`') && file_path.ends_with('`'))
212 {
213 continue;
214 }
215
216 let file_path = strip_query_and_fragment(file_path);
218
219 let fragment = caps.get(2).map(|m| m.as_str().trim_start_matches('#')).unwrap_or("");
221
222 if is_markdown_file(file_path) {
224 links.push(CrossFileLinkIndex {
225 target_path: file_path.to_string(),
226 fragment: fragment.to_string(),
227 line: link.line,
228 column: url_group.start() + 1,
229 });
230 }
231 }
232 }
233 }
234
235 links
236}
237
238#[cfg(feature = "native")]
240const CACHE_MAGIC: &[u8; 4] = b"RWSI";
241
242#[cfg(feature = "native")]
244const CACHE_FORMAT_VERSION: u32 = 5;
245
246#[cfg(feature = "native")]
248const CACHE_FILE_NAME: &str = "workspace_index.bin";
249
250#[derive(Debug, Default, Clone, Serialize, Deserialize)]
255pub struct WorkspaceIndex {
256 files: HashMap<PathBuf, FileIndex>,
258 reverse_deps: HashMap<PathBuf, HashSet<PathBuf>>,
261 version: u64,
263}
264
265#[derive(Debug, Clone, Default, Serialize, Deserialize)]
267pub struct FileIndex {
268 pub headings: Vec<HeadingIndex>,
270 pub reference_links: Vec<ReferenceLinkIndex>,
272 pub cross_file_links: Vec<CrossFileLinkIndex>,
274 pub defined_references: HashSet<String>,
277 pub content_hash: String,
279 anchor_to_heading: HashMap<String, usize>,
282 html_anchors: HashSet<String>,
285 attribute_anchors: HashSet<String>,
289 pub file_disabled_rules: HashSet<String>,
292 pub persistent_transitions: Vec<(usize, HashSet<String>, HashSet<String>)>,
295 pub line_disabled_rules: HashMap<usize, HashSet<String>>,
297}
298
299#[derive(Debug, Clone, Serialize, Deserialize)]
301pub struct HeadingIndex {
302 pub text: String,
304 pub auto_anchor: String,
306 pub custom_anchor: Option<String>,
308 pub line: usize,
310}
311
312#[derive(Debug, Clone, Serialize, Deserialize)]
314pub struct ReferenceLinkIndex {
315 pub reference_id: String,
317 pub line: usize,
319 pub column: usize,
321}
322
323#[derive(Debug, Clone, Serialize, Deserialize)]
325pub struct CrossFileLinkIndex {
326 pub target_path: String,
328 pub fragment: String,
330 pub line: usize,
332 pub column: usize,
334}
335
336#[derive(Debug, Clone, Serialize, Deserialize)]
338pub struct VulnerableAnchor {
339 pub file: PathBuf,
341 pub line: usize,
343 pub text: String,
345}
346
347impl WorkspaceIndex {
348 pub fn new() -> Self {
350 Self::default()
351 }
352
353 pub fn version(&self) -> u64 {
355 self.version
356 }
357
358 pub fn file_count(&self) -> usize {
360 self.files.len()
361 }
362
363 pub fn contains_file(&self, path: &Path) -> bool {
365 self.files.contains_key(path)
366 }
367
368 pub fn get_file(&self, path: &Path) -> Option<&FileIndex> {
370 self.files.get(path)
371 }
372
373 pub fn insert_file(&mut self, path: PathBuf, index: FileIndex) {
375 self.files.insert(path, index);
376 self.version = self.version.wrapping_add(1);
377 }
378
379 pub fn remove_file(&mut self, path: &Path) -> Option<FileIndex> {
381 self.clear_reverse_deps_for(path);
383
384 let result = self.files.remove(path);
385 if result.is_some() {
386 self.version = self.version.wrapping_add(1);
387 }
388 result
389 }
390
391 pub fn get_vulnerable_anchors(&self) -> HashMap<String, Vec<VulnerableAnchor>> {
401 let mut vulnerable: HashMap<String, Vec<VulnerableAnchor>> = HashMap::new();
402
403 for (file_path, file_index) in &self.files {
404 for heading in &file_index.headings {
405 if heading.custom_anchor.is_none() && !heading.auto_anchor.is_empty() {
407 let anchor_key = heading.auto_anchor.to_lowercase();
408 vulnerable.entry(anchor_key).or_default().push(VulnerableAnchor {
409 file: file_path.clone(),
410 line: heading.line,
411 text: heading.text.clone(),
412 });
413 }
414 }
415 }
416
417 vulnerable
418 }
419
420 pub fn all_headings(&self) -> impl Iterator<Item = (&Path, &HeadingIndex)> {
422 self.files
423 .iter()
424 .flat_map(|(path, index)| index.headings.iter().map(move |h| (path.as_path(), h)))
425 }
426
427 pub fn files(&self) -> impl Iterator<Item = (&Path, &FileIndex)> {
429 self.files.iter().map(|(p, i)| (p.as_path(), i))
430 }
431
432 pub fn clear(&mut self) {
434 self.files.clear();
435 self.reverse_deps.clear();
436 self.version = self.version.wrapping_add(1);
437 }
438
439 pub fn update_file(&mut self, path: &Path, index: FileIndex) {
446 self.clear_reverse_deps_as_source(path);
449
450 for link in &index.cross_file_links {
452 let target = self.resolve_target_path(path, &link.target_path);
453 self.reverse_deps.entry(target).or_default().insert(path.to_path_buf());
454 }
455
456 self.files.insert(path.to_path_buf(), index);
457 self.version = self.version.wrapping_add(1);
458 }
459
460 pub fn get_dependents(&self, path: &Path) -> Vec<PathBuf> {
465 self.reverse_deps
466 .get(path)
467 .map(|set| set.iter().cloned().collect())
468 .unwrap_or_default()
469 }
470
471 pub fn is_file_stale(&self, path: &Path, current_hash: &str) -> bool {
475 self.files
476 .get(path)
477 .map(|f| f.content_hash != current_hash)
478 .unwrap_or(true)
479 }
480
481 pub fn retain_only(&mut self, current_files: &std::collections::HashSet<PathBuf>) -> usize {
486 let before_count = self.files.len();
487
488 let to_remove: Vec<PathBuf> = self
490 .files
491 .keys()
492 .filter(|path| !current_files.contains(*path))
493 .cloned()
494 .collect();
495
496 for path in &to_remove {
498 self.remove_file(path);
499 }
500
501 before_count - self.files.len()
502 }
503
504 #[cfg(feature = "native")]
511 pub fn save_to_cache(&self, cache_dir: &Path) -> std::io::Result<()> {
512 use std::fs;
513 use std::io::Write;
514
515 fs::create_dir_all(cache_dir)?;
517
518 let encoded = postcard::to_allocvec(self)
520 .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string()))?;
521
522 let mut cache_data = Vec::with_capacity(8 + encoded.len());
524 cache_data.extend_from_slice(CACHE_MAGIC);
525 cache_data.extend_from_slice(&CACHE_FORMAT_VERSION.to_le_bytes());
526 cache_data.extend_from_slice(&encoded);
527
528 let final_path = cache_dir.join(CACHE_FILE_NAME);
530 let temp_path = cache_dir.join(format!("{}.tmp.{}", CACHE_FILE_NAME, std::process::id()));
531
532 {
534 let mut file = fs::File::create(&temp_path)?;
535 file.write_all(&cache_data)?;
536 file.sync_all()?;
537 }
538
539 fs::rename(&temp_path, &final_path)?;
541
542 log::debug!(
543 "Saved workspace index to cache: {} files, {} bytes (format v{})",
544 self.files.len(),
545 cache_data.len(),
546 CACHE_FORMAT_VERSION
547 );
548
549 Ok(())
550 }
551
552 #[cfg(feature = "native")]
560 pub fn load_from_cache(cache_dir: &Path) -> Option<Self> {
561 use std::fs;
562
563 let path = cache_dir.join(CACHE_FILE_NAME);
564 let data = fs::read(&path).ok()?;
565
566 if data.len() < 8 {
568 log::warn!("Workspace index cache too small, discarding");
569 let _ = fs::remove_file(&path);
570 return None;
571 }
572
573 if &data[0..4] != CACHE_MAGIC {
575 log::warn!("Workspace index cache has invalid magic header, discarding");
576 let _ = fs::remove_file(&path);
577 return None;
578 }
579
580 let version = u32::from_le_bytes([data[4], data[5], data[6], data[7]]);
582 if version != CACHE_FORMAT_VERSION {
583 log::info!(
584 "Workspace index cache format version mismatch (got {version}, expected {CACHE_FORMAT_VERSION}), rebuilding"
585 );
586 let _ = fs::remove_file(&path);
587 return None;
588 }
589
590 match postcard::from_bytes::<Self>(&data[8..]) {
592 Ok(index) => {
593 log::debug!(
594 "Loaded workspace index from cache: {} files (format v{})",
595 index.files.len(),
596 version
597 );
598 Some(index)
599 }
600 Err(e) => {
601 log::warn!("Failed to deserialize workspace index cache: {e}");
602 let _ = fs::remove_file(&path);
603 None
604 }
605 }
606 }
607
608 fn clear_reverse_deps_as_source(&mut self, path: &Path) {
613 for deps in self.reverse_deps.values_mut() {
614 deps.remove(path);
615 }
616 self.reverse_deps.retain(|_, deps| !deps.is_empty());
618 }
619
620 fn clear_reverse_deps_for(&mut self, path: &Path) {
625 self.clear_reverse_deps_as_source(path);
627
628 self.reverse_deps.remove(path);
630 }
631
632 fn resolve_target_path(&self, source_file: &Path, relative_target: &str) -> PathBuf {
634 let source_dir = source_file.parent().unwrap_or(Path::new(""));
636
637 let target = source_dir.join(relative_target);
639
640 Self::normalize_path(&target)
642 }
643
644 fn normalize_path(path: &Path) -> PathBuf {
646 let mut components = Vec::new();
647
648 for component in path.components() {
649 match component {
650 std::path::Component::ParentDir => {
651 if !components.is_empty() {
653 components.pop();
654 }
655 }
656 std::path::Component::CurDir => {
657 }
659 _ => {
660 components.push(component);
661 }
662 }
663 }
664
665 components.iter().collect()
666 }
667}
668
669impl FileIndex {
670 pub fn new() -> Self {
672 Self::default()
673 }
674
675 pub fn with_hash(content_hash: String) -> Self {
677 Self {
678 content_hash,
679 ..Default::default()
680 }
681 }
682
683 pub fn add_heading(&mut self, heading: HeadingIndex) {
687 let index = self.headings.len();
688
689 self.anchor_to_heading.insert(heading.auto_anchor.to_lowercase(), index);
691
692 if let Some(ref custom) = heading.custom_anchor {
694 self.anchor_to_heading.insert(custom.to_lowercase(), index);
695 }
696
697 self.headings.push(heading);
698 }
699
700 pub fn add_anchor_alias(&mut self, anchor: String, heading_index: usize) {
703 if heading_index < self.headings.len() {
704 self.anchor_to_heading.insert(anchor.to_lowercase(), heading_index);
705 }
706 }
707
708 pub fn has_anchor(&self, anchor: &str) -> bool {
719 let lower = anchor.to_lowercase();
720
721 if self.anchor_to_heading.contains_key(&lower)
723 || self.html_anchors.contains(&lower)
724 || self.attribute_anchors.contains(&lower)
725 {
726 return true;
727 }
728
729 if anchor.contains('%') {
731 let decoded = url_decode(anchor).to_lowercase();
732 if decoded != lower {
733 return self.anchor_to_heading.contains_key(&decoded)
734 || self.html_anchors.contains(&decoded)
735 || self.attribute_anchors.contains(&decoded);
736 }
737 }
738
739 false
740 }
741
742 pub fn add_html_anchor(&mut self, anchor: String) {
744 if !anchor.is_empty() {
745 self.html_anchors.insert(anchor.to_lowercase());
746 }
747 }
748
749 pub fn add_attribute_anchor(&mut self, anchor: String) {
751 if !anchor.is_empty() {
752 self.attribute_anchors.insert(anchor.to_lowercase());
753 }
754 }
755
756 pub fn get_heading_by_anchor(&self, anchor: &str) -> Option<&HeadingIndex> {
760 self.anchor_to_heading
761 .get(&anchor.to_lowercase())
762 .and_then(|&idx| self.headings.get(idx))
763 }
764
765 pub fn add_reference_link(&mut self, link: ReferenceLinkIndex) {
767 self.reference_links.push(link);
768 }
769
770 pub fn is_rule_disabled_at_line(&self, rule_name: &str, line: usize) -> bool {
775 if self.file_disabled_rules.contains("*") || self.file_disabled_rules.contains(rule_name) {
777 return true;
778 }
779
780 if let Some(rules) = self.line_disabled_rules.get(&line)
782 && (rules.contains("*") || rules.contains(rule_name))
783 {
784 return true;
785 }
786
787 if !self.persistent_transitions.is_empty() {
789 let idx = match self.persistent_transitions.binary_search_by_key(&line, |t| t.0) {
790 Ok(i) => Some(i),
791 Err(i) => {
792 if i > 0 {
793 Some(i - 1)
794 } else {
795 None
796 }
797 }
798 };
799 if let Some(i) = idx {
800 let (_, ref disabled, ref enabled) = self.persistent_transitions[i];
801 if disabled.contains("*") {
802 return !enabled.contains(rule_name);
803 }
804 return disabled.contains(rule_name);
805 }
806 }
807
808 false
809 }
810
811 pub fn add_cross_file_link(&mut self, link: CrossFileLinkIndex) {
813 let is_duplicate = self.cross_file_links.iter().any(|existing| {
816 existing.target_path == link.target_path && existing.fragment == link.fragment && existing.line == link.line
817 });
818 if !is_duplicate {
819 self.cross_file_links.push(link);
820 }
821 }
822
823 pub fn add_defined_reference(&mut self, ref_id: String) {
825 self.defined_references.insert(ref_id);
826 }
827
828 pub fn has_defined_reference(&self, ref_id: &str) -> bool {
830 self.defined_references.contains(ref_id)
831 }
832
833 pub fn hash_matches(&self, hash: &str) -> bool {
835 self.content_hash == hash
836 }
837
838 pub fn heading_count(&self) -> usize {
840 self.headings.len()
841 }
842
843 pub fn reference_link_count(&self) -> usize {
845 self.reference_links.len()
846 }
847}
848
849#[cfg(test)]
850mod tests {
851 use super::*;
852
853 #[test]
854 fn test_workspace_index_basic() {
855 let mut index = WorkspaceIndex::new();
856 assert_eq!(index.file_count(), 0);
857 assert_eq!(index.version(), 0);
858
859 let mut file_index = FileIndex::with_hash("abc123".to_string());
860 file_index.add_heading(HeadingIndex {
861 text: "Installation".to_string(),
862 auto_anchor: "installation".to_string(),
863 custom_anchor: None,
864 line: 1,
865 });
866
867 index.insert_file(PathBuf::from("docs/install.md"), file_index);
868 assert_eq!(index.file_count(), 1);
869 assert_eq!(index.version(), 1);
870
871 assert!(index.contains_file(Path::new("docs/install.md")));
872 assert!(!index.contains_file(Path::new("docs/other.md")));
873 }
874
875 #[test]
876 fn test_vulnerable_anchors() {
877 let mut index = WorkspaceIndex::new();
878
879 let mut file1 = FileIndex::new();
881 file1.add_heading(HeadingIndex {
882 text: "Getting Started".to_string(),
883 auto_anchor: "getting-started".to_string(),
884 custom_anchor: None,
885 line: 1,
886 });
887 index.insert_file(PathBuf::from("docs/guide.md"), file1);
888
889 let mut file2 = FileIndex::new();
891 file2.add_heading(HeadingIndex {
892 text: "Installation".to_string(),
893 auto_anchor: "installation".to_string(),
894 custom_anchor: Some("install".to_string()),
895 line: 1,
896 });
897 index.insert_file(PathBuf::from("docs/install.md"), file2);
898
899 let vulnerable = index.get_vulnerable_anchors();
900 assert_eq!(vulnerable.len(), 1);
901 assert!(vulnerable.contains_key("getting-started"));
902 assert!(!vulnerable.contains_key("installation"));
903
904 let anchors = vulnerable.get("getting-started").unwrap();
905 assert_eq!(anchors.len(), 1);
906 assert_eq!(anchors[0].file, PathBuf::from("docs/guide.md"));
907 assert_eq!(anchors[0].text, "Getting Started");
908 }
909
910 #[test]
911 fn test_vulnerable_anchors_multiple_files_same_anchor() {
912 let mut index = WorkspaceIndex::new();
915
916 let mut file1 = FileIndex::new();
918 file1.add_heading(HeadingIndex {
919 text: "Installation".to_string(),
920 auto_anchor: "installation".to_string(),
921 custom_anchor: None,
922 line: 1,
923 });
924 index.insert_file(PathBuf::from("docs/en/guide.md"), file1);
925
926 let mut file2 = FileIndex::new();
928 file2.add_heading(HeadingIndex {
929 text: "Installation".to_string(),
930 auto_anchor: "installation".to_string(),
931 custom_anchor: None,
932 line: 5,
933 });
934 index.insert_file(PathBuf::from("docs/fr/guide.md"), file2);
935
936 let mut file3 = FileIndex::new();
938 file3.add_heading(HeadingIndex {
939 text: "Installation".to_string(),
940 auto_anchor: "installation".to_string(),
941 custom_anchor: Some("install".to_string()),
942 line: 10,
943 });
944 index.insert_file(PathBuf::from("docs/de/guide.md"), file3);
945
946 let vulnerable = index.get_vulnerable_anchors();
947 assert_eq!(vulnerable.len(), 1); assert!(vulnerable.contains_key("installation"));
949
950 let anchors = vulnerable.get("installation").unwrap();
951 assert_eq!(anchors.len(), 2, "Should collect both vulnerable anchors");
953
954 let files: std::collections::HashSet<_> = anchors.iter().map(|a| &a.file).collect();
956 assert!(files.contains(&PathBuf::from("docs/en/guide.md")));
957 assert!(files.contains(&PathBuf::from("docs/fr/guide.md")));
958 }
959
960 #[test]
961 fn test_file_index_hash() {
962 let index = FileIndex::with_hash("hash123".to_string());
963 assert!(index.hash_matches("hash123"));
964 assert!(!index.hash_matches("other"));
965 }
966
967 #[test]
968 fn test_version_increment() {
969 let mut index = WorkspaceIndex::new();
970 assert_eq!(index.version(), 0);
971
972 index.insert_file(PathBuf::from("a.md"), FileIndex::new());
973 assert_eq!(index.version(), 1);
974
975 index.insert_file(PathBuf::from("b.md"), FileIndex::new());
976 assert_eq!(index.version(), 2);
977
978 index.remove_file(Path::new("a.md"));
979 assert_eq!(index.version(), 3);
980
981 index.remove_file(Path::new("nonexistent.md"));
983 assert_eq!(index.version(), 3);
984 }
985
986 #[test]
987 fn test_reverse_deps_basic() {
988 let mut index = WorkspaceIndex::new();
989
990 let mut file_a = FileIndex::new();
992 file_a.add_cross_file_link(CrossFileLinkIndex {
993 target_path: "b.md".to_string(),
994 fragment: "section".to_string(),
995 line: 10,
996 column: 5,
997 });
998 index.update_file(Path::new("docs/a.md"), file_a);
999
1000 let dependents = index.get_dependents(Path::new("docs/b.md"));
1002 assert_eq!(dependents.len(), 1);
1003 assert_eq!(dependents[0], PathBuf::from("docs/a.md"));
1004
1005 let a_dependents = index.get_dependents(Path::new("docs/a.md"));
1007 assert!(a_dependents.is_empty());
1008 }
1009
1010 #[test]
1011 fn test_reverse_deps_multiple() {
1012 let mut index = WorkspaceIndex::new();
1013
1014 let mut file_a = FileIndex::new();
1016 file_a.add_cross_file_link(CrossFileLinkIndex {
1017 target_path: "../b.md".to_string(),
1018 fragment: "".to_string(),
1019 line: 1,
1020 column: 1,
1021 });
1022 index.update_file(Path::new("docs/sub/a.md"), file_a);
1023
1024 let mut file_c = FileIndex::new();
1025 file_c.add_cross_file_link(CrossFileLinkIndex {
1026 target_path: "b.md".to_string(),
1027 fragment: "".to_string(),
1028 line: 1,
1029 column: 1,
1030 });
1031 index.update_file(Path::new("docs/c.md"), file_c);
1032
1033 let dependents = index.get_dependents(Path::new("docs/b.md"));
1035 assert_eq!(dependents.len(), 2);
1036 assert!(dependents.contains(&PathBuf::from("docs/sub/a.md")));
1037 assert!(dependents.contains(&PathBuf::from("docs/c.md")));
1038 }
1039
1040 #[test]
1041 fn test_reverse_deps_update_clears_old() {
1042 let mut index = WorkspaceIndex::new();
1043
1044 let mut file_a = FileIndex::new();
1046 file_a.add_cross_file_link(CrossFileLinkIndex {
1047 target_path: "b.md".to_string(),
1048 fragment: "".to_string(),
1049 line: 1,
1050 column: 1,
1051 });
1052 index.update_file(Path::new("docs/a.md"), file_a);
1053
1054 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1056
1057 let mut file_a_updated = FileIndex::new();
1059 file_a_updated.add_cross_file_link(CrossFileLinkIndex {
1060 target_path: "c.md".to_string(),
1061 fragment: "".to_string(),
1062 line: 1,
1063 column: 1,
1064 });
1065 index.update_file(Path::new("docs/a.md"), file_a_updated);
1066
1067 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1069
1070 let c_deps = index.get_dependents(Path::new("docs/c.md"));
1072 assert_eq!(c_deps.len(), 1);
1073 assert_eq!(c_deps[0], PathBuf::from("docs/a.md"));
1074 }
1075
1076 #[test]
1077 fn test_reverse_deps_remove_file() {
1078 let mut index = WorkspaceIndex::new();
1079
1080 let mut file_a = FileIndex::new();
1082 file_a.add_cross_file_link(CrossFileLinkIndex {
1083 target_path: "b.md".to_string(),
1084 fragment: "".to_string(),
1085 line: 1,
1086 column: 1,
1087 });
1088 index.update_file(Path::new("docs/a.md"), file_a);
1089
1090 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1092
1093 index.remove_file(Path::new("docs/a.md"));
1095
1096 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1098 }
1099
1100 #[test]
1101 fn test_normalize_path() {
1102 let path = Path::new("docs/sub/../other.md");
1104 let normalized = WorkspaceIndex::normalize_path(path);
1105 assert_eq!(normalized, PathBuf::from("docs/other.md"));
1106
1107 let path2 = Path::new("docs/./other.md");
1109 let normalized2 = WorkspaceIndex::normalize_path(path2);
1110 assert_eq!(normalized2, PathBuf::from("docs/other.md"));
1111
1112 let path3 = Path::new("a/b/c/../../d.md");
1114 let normalized3 = WorkspaceIndex::normalize_path(path3);
1115 assert_eq!(normalized3, PathBuf::from("a/d.md"));
1116 }
1117
1118 #[test]
1119 fn test_clear_clears_reverse_deps() {
1120 let mut index = WorkspaceIndex::new();
1121
1122 let mut file_a = FileIndex::new();
1124 file_a.add_cross_file_link(CrossFileLinkIndex {
1125 target_path: "b.md".to_string(),
1126 fragment: "".to_string(),
1127 line: 1,
1128 column: 1,
1129 });
1130 index.update_file(Path::new("docs/a.md"), file_a);
1131
1132 assert_eq!(index.get_dependents(Path::new("docs/b.md")).len(), 1);
1134
1135 index.clear();
1137
1138 assert_eq!(index.file_count(), 0);
1140 assert!(index.get_dependents(Path::new("docs/b.md")).is_empty());
1141 }
1142
1143 #[test]
1144 fn test_is_file_stale() {
1145 let mut index = WorkspaceIndex::new();
1146
1147 assert!(index.is_file_stale(Path::new("nonexistent.md"), "hash123"));
1149
1150 let file_index = FileIndex::with_hash("hash123".to_string());
1152 index.insert_file(PathBuf::from("docs/test.md"), file_index);
1153
1154 assert!(!index.is_file_stale(Path::new("docs/test.md"), "hash123"));
1156
1157 assert!(index.is_file_stale(Path::new("docs/test.md"), "different_hash"));
1159 }
1160
1161 #[cfg(feature = "native")]
1162 #[test]
1163 fn test_cache_roundtrip() {
1164 use std::fs;
1165
1166 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_roundtrip");
1168 let _ = fs::remove_dir_all(&temp_dir);
1169 fs::create_dir_all(&temp_dir).unwrap();
1170
1171 let mut index = WorkspaceIndex::new();
1173
1174 let mut file1 = FileIndex::with_hash("abc123".to_string());
1175 file1.add_heading(HeadingIndex {
1176 text: "Test Heading".to_string(),
1177 auto_anchor: "test-heading".to_string(),
1178 custom_anchor: Some("test".to_string()),
1179 line: 1,
1180 });
1181 file1.add_cross_file_link(CrossFileLinkIndex {
1182 target_path: "./other.md".to_string(),
1183 fragment: "section".to_string(),
1184 line: 5,
1185 column: 3,
1186 });
1187 index.update_file(Path::new("docs/file1.md"), file1);
1188
1189 let mut file2 = FileIndex::with_hash("def456".to_string());
1190 file2.add_heading(HeadingIndex {
1191 text: "Another Heading".to_string(),
1192 auto_anchor: "another-heading".to_string(),
1193 custom_anchor: None,
1194 line: 1,
1195 });
1196 index.update_file(Path::new("docs/other.md"), file2);
1197
1198 index.save_to_cache(&temp_dir).expect("Failed to save cache");
1200
1201 assert!(temp_dir.join("workspace_index.bin").exists());
1203
1204 let loaded = WorkspaceIndex::load_from_cache(&temp_dir).expect("Failed to load cache");
1206
1207 assert_eq!(loaded.file_count(), 2);
1209 assert!(loaded.contains_file(Path::new("docs/file1.md")));
1210 assert!(loaded.contains_file(Path::new("docs/other.md")));
1211
1212 let file1_loaded = loaded.get_file(Path::new("docs/file1.md")).unwrap();
1214 assert_eq!(file1_loaded.content_hash, "abc123");
1215 assert_eq!(file1_loaded.headings.len(), 1);
1216 assert_eq!(file1_loaded.headings[0].text, "Test Heading");
1217 assert_eq!(file1_loaded.headings[0].custom_anchor, Some("test".to_string()));
1218 assert_eq!(file1_loaded.cross_file_links.len(), 1);
1219 assert_eq!(file1_loaded.cross_file_links[0].target_path, "./other.md");
1220
1221 let dependents = loaded.get_dependents(Path::new("docs/other.md"));
1223 assert_eq!(dependents.len(), 1);
1224 assert_eq!(dependents[0], PathBuf::from("docs/file1.md"));
1225
1226 let _ = fs::remove_dir_all(&temp_dir);
1228 }
1229
1230 #[cfg(feature = "native")]
1231 #[test]
1232 fn test_cache_missing_file() {
1233 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_missing");
1234 let _ = std::fs::remove_dir_all(&temp_dir);
1235
1236 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1238 assert!(result.is_none());
1239 }
1240
1241 #[cfg(feature = "native")]
1242 #[test]
1243 fn test_cache_corrupted_file() {
1244 use std::fs;
1245
1246 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_corrupted");
1247 let _ = fs::remove_dir_all(&temp_dir);
1248 fs::create_dir_all(&temp_dir).unwrap();
1249
1250 fs::write(temp_dir.join("workspace_index.bin"), b"bad").unwrap();
1252
1253 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1255 assert!(result.is_none());
1256
1257 assert!(!temp_dir.join("workspace_index.bin").exists());
1259
1260 let _ = fs::remove_dir_all(&temp_dir);
1262 }
1263
1264 #[cfg(feature = "native")]
1265 #[test]
1266 fn test_cache_invalid_magic() {
1267 use std::fs;
1268
1269 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_invalid_magic");
1270 let _ = fs::remove_dir_all(&temp_dir);
1271 fs::create_dir_all(&temp_dir).unwrap();
1272
1273 let mut data = Vec::new();
1275 data.extend_from_slice(b"XXXX"); data.extend_from_slice(&1u32.to_le_bytes()); data.extend_from_slice(&[0; 100]); fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
1279
1280 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1282 assert!(result.is_none());
1283
1284 assert!(!temp_dir.join("workspace_index.bin").exists());
1286
1287 let _ = fs::remove_dir_all(&temp_dir);
1289 }
1290
1291 #[cfg(feature = "native")]
1292 #[test]
1293 fn test_cache_version_mismatch() {
1294 use std::fs;
1295
1296 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_version_mismatch");
1297 let _ = fs::remove_dir_all(&temp_dir);
1298 fs::create_dir_all(&temp_dir).unwrap();
1299
1300 let mut data = Vec::new();
1302 data.extend_from_slice(b"RWSI"); data.extend_from_slice(&999u32.to_le_bytes()); data.extend_from_slice(&[0; 100]); fs::write(temp_dir.join("workspace_index.bin"), &data).unwrap();
1306
1307 let result = WorkspaceIndex::load_from_cache(&temp_dir);
1309 assert!(result.is_none());
1310
1311 assert!(!temp_dir.join("workspace_index.bin").exists());
1313
1314 let _ = fs::remove_dir_all(&temp_dir);
1316 }
1317
1318 #[cfg(feature = "native")]
1319 #[test]
1320 fn test_cache_atomic_write() {
1321 use std::fs;
1322
1323 let temp_dir = std::env::temp_dir().join("rumdl_test_cache_atomic");
1325 let _ = fs::remove_dir_all(&temp_dir);
1326 fs::create_dir_all(&temp_dir).unwrap();
1327
1328 let index = WorkspaceIndex::new();
1329 index.save_to_cache(&temp_dir).expect("Failed to save");
1330
1331 let entries: Vec<_> = fs::read_dir(&temp_dir).unwrap().collect();
1333 assert_eq!(entries.len(), 1);
1334 assert!(temp_dir.join("workspace_index.bin").exists());
1335
1336 let _ = fs::remove_dir_all(&temp_dir);
1338 }
1339
1340 #[test]
1341 fn test_has_anchor_auto_generated() {
1342 let mut file_index = FileIndex::new();
1343 file_index.add_heading(HeadingIndex {
1344 text: "Installation Guide".to_string(),
1345 auto_anchor: "installation-guide".to_string(),
1346 custom_anchor: None,
1347 line: 1,
1348 });
1349
1350 assert!(file_index.has_anchor("installation-guide"));
1352
1353 assert!(file_index.has_anchor("Installation-Guide"));
1355 assert!(file_index.has_anchor("INSTALLATION-GUIDE"));
1356
1357 assert!(!file_index.has_anchor("nonexistent"));
1359 }
1360
1361 #[test]
1362 fn test_has_anchor_custom() {
1363 let mut file_index = FileIndex::new();
1364 file_index.add_heading(HeadingIndex {
1365 text: "Installation Guide".to_string(),
1366 auto_anchor: "installation-guide".to_string(),
1367 custom_anchor: Some("install".to_string()),
1368 line: 1,
1369 });
1370
1371 assert!(file_index.has_anchor("installation-guide"));
1373
1374 assert!(file_index.has_anchor("install"));
1376 assert!(file_index.has_anchor("Install")); assert!(!file_index.has_anchor("nonexistent"));
1380 }
1381
1382 #[test]
1383 fn test_get_heading_by_anchor() {
1384 let mut file_index = FileIndex::new();
1385 file_index.add_heading(HeadingIndex {
1386 text: "Installation Guide".to_string(),
1387 auto_anchor: "installation-guide".to_string(),
1388 custom_anchor: Some("install".to_string()),
1389 line: 10,
1390 });
1391 file_index.add_heading(HeadingIndex {
1392 text: "Configuration".to_string(),
1393 auto_anchor: "configuration".to_string(),
1394 custom_anchor: None,
1395 line: 20,
1396 });
1397
1398 let heading = file_index.get_heading_by_anchor("installation-guide");
1400 assert!(heading.is_some());
1401 assert_eq!(heading.unwrap().text, "Installation Guide");
1402 assert_eq!(heading.unwrap().line, 10);
1403
1404 let heading = file_index.get_heading_by_anchor("install");
1406 assert!(heading.is_some());
1407 assert_eq!(heading.unwrap().text, "Installation Guide");
1408
1409 let heading = file_index.get_heading_by_anchor("configuration");
1411 assert!(heading.is_some());
1412 assert_eq!(heading.unwrap().text, "Configuration");
1413 assert_eq!(heading.unwrap().line, 20);
1414
1415 assert!(file_index.get_heading_by_anchor("nonexistent").is_none());
1417 }
1418
1419 #[test]
1420 fn test_anchor_lookup_many_headings() {
1421 let mut file_index = FileIndex::new();
1423
1424 for i in 0..100 {
1426 file_index.add_heading(HeadingIndex {
1427 text: format!("Heading {i}"),
1428 auto_anchor: format!("heading-{i}"),
1429 custom_anchor: Some(format!("h{i}")),
1430 line: i + 1,
1431 });
1432 }
1433
1434 for i in 0..100 {
1436 assert!(file_index.has_anchor(&format!("heading-{i}")));
1437 assert!(file_index.has_anchor(&format!("h{i}")));
1438
1439 let heading = file_index.get_heading_by_anchor(&format!("heading-{i}"));
1440 assert!(heading.is_some());
1441 assert_eq!(heading.unwrap().line, i + 1);
1442 }
1443 }
1444
1445 #[test]
1450 fn test_extract_cross_file_links_basic() {
1451 use crate::config::MarkdownFlavor;
1452
1453 let content = "# Test\n\nSee [link](./other.md) for info.\n";
1454 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1455 let links = extract_cross_file_links(&ctx);
1456
1457 assert_eq!(links.len(), 1);
1458 assert_eq!(links[0].target_path, "./other.md");
1459 assert_eq!(links[0].fragment, "");
1460 assert_eq!(links[0].line, 3);
1461 assert_eq!(links[0].column, 12);
1463 }
1464
1465 #[test]
1466 fn test_extract_cross_file_links_with_fragment() {
1467 use crate::config::MarkdownFlavor;
1468
1469 let content = "Check [guide](./guide.md#install) here.\n";
1470 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1471 let links = extract_cross_file_links(&ctx);
1472
1473 assert_eq!(links.len(), 1);
1474 assert_eq!(links[0].target_path, "./guide.md");
1475 assert_eq!(links[0].fragment, "install");
1476 assert_eq!(links[0].line, 1);
1477 assert_eq!(links[0].column, 15);
1479 }
1480
1481 #[test]
1482 fn test_extract_cross_file_links_multiple_on_same_line() {
1483 use crate::config::MarkdownFlavor;
1484
1485 let content = "See [a](a.md) and [b](b.md) here.\n";
1486 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1487 let links = extract_cross_file_links(&ctx);
1488
1489 assert_eq!(links.len(), 2);
1490
1491 assert_eq!(links[0].target_path, "a.md");
1492 assert_eq!(links[0].line, 1);
1493 assert_eq!(links[0].column, 9);
1495
1496 assert_eq!(links[1].target_path, "b.md");
1497 assert_eq!(links[1].line, 1);
1498 assert_eq!(links[1].column, 23);
1500 }
1501
1502 #[test]
1503 fn test_extract_cross_file_links_angle_brackets() {
1504 use crate::config::MarkdownFlavor;
1505
1506 let content = "See [link](<path/with (parens).md>) here.\n";
1507 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1508 let links = extract_cross_file_links(&ctx);
1509
1510 assert_eq!(links.len(), 1);
1511 assert_eq!(links[0].target_path, "path/with (parens).md");
1512 assert_eq!(links[0].line, 1);
1513 assert_eq!(links[0].column, 13);
1515 }
1516
1517 #[test]
1518 fn test_extract_cross_file_links_skips_external() {
1519 use crate::config::MarkdownFlavor;
1520
1521 let content = r#"
1522[external](https://example.com)
1523[mailto](mailto:test@example.com)
1524[local](./local.md)
1525[fragment](#section)
1526[absolute](/docs/page.md)
1527"#;
1528 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1529 let links = extract_cross_file_links(&ctx);
1530
1531 assert_eq!(links.len(), 1);
1533 assert_eq!(links[0].target_path, "./local.md");
1534 }
1535
1536 #[test]
1537 fn test_extract_cross_file_links_skips_non_markdown() {
1538 use crate::config::MarkdownFlavor;
1539
1540 let content = r#"
1541[image](./photo.png)
1542[doc](./readme.md)
1543[pdf](./document.pdf)
1544"#;
1545 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1546 let links = extract_cross_file_links(&ctx);
1547
1548 assert_eq!(links.len(), 1);
1550 assert_eq!(links[0].target_path, "./readme.md");
1551 }
1552
1553 #[test]
1554 fn test_extract_cross_file_links_skips_code_spans() {
1555 use crate::config::MarkdownFlavor;
1556
1557 let content = "Normal [link](./file.md) and `[code](./ignored.md)` here.\n";
1558 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1559 let links = extract_cross_file_links(&ctx);
1560
1561 assert_eq!(links.len(), 1);
1563 assert_eq!(links[0].target_path, "./file.md");
1564 }
1565
1566 #[test]
1567 fn test_extract_cross_file_links_with_query_params() {
1568 use crate::config::MarkdownFlavor;
1569
1570 let content = "See [doc](./file.md?raw=true) here.\n";
1571 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1572 let links = extract_cross_file_links(&ctx);
1573
1574 assert_eq!(links.len(), 1);
1575 assert_eq!(links[0].target_path, "./file.md");
1577 }
1578
1579 #[test]
1580 fn test_extract_cross_file_links_empty_content() {
1581 use crate::config::MarkdownFlavor;
1582
1583 let content = "";
1584 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1585 let links = extract_cross_file_links(&ctx);
1586
1587 assert!(links.is_empty());
1588 }
1589
1590 #[test]
1591 fn test_extract_cross_file_links_no_links() {
1592 use crate::config::MarkdownFlavor;
1593
1594 let content = "# Just a heading\n\nSome text without links.\n";
1595 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1596 let links = extract_cross_file_links(&ctx);
1597
1598 assert!(links.is_empty());
1599 }
1600
1601 #[test]
1602 fn test_extract_cross_file_links_position_accuracy_issue_234() {
1603 use crate::config::MarkdownFlavor;
1606
1607 let content = r#"# Test Document
1608
1609Here is a [broken link](nonexistent-file.md) that should trigger MD057.
1610
1611And another [link](also-missing.md) on this line.
1612"#;
1613 let ctx = LintContext::new(content, MarkdownFlavor::default(), None);
1614 let links = extract_cross_file_links(&ctx);
1615
1616 assert_eq!(links.len(), 2);
1617
1618 assert_eq!(links[0].target_path, "nonexistent-file.md");
1620 assert_eq!(links[0].line, 3);
1621 assert_eq!(links[0].column, 25);
1622
1623 assert_eq!(links[1].target_path, "also-missing.md");
1625 assert_eq!(links[1].line, 5);
1626 assert_eq!(links[1].column, 20);
1627 }
1628}