Skip to main content

zccache_depgraph/
scanner.rs

1//! `#include` directive scanner.
2//!
3//! Scans C/C++ source files for `#include` directives, skipping comments
4//! and string literals. Does not evaluate preprocessor conditionals —
5//! all `#include` directives are returned unconditionally.
6
7use std::path::Path;
8
9use crate::search_paths::IncludeSearchPaths;
10use zccache_core::NormalizedPath;
11
12/// The kind of `#include` directive.
13#[derive(Debug, Clone, PartialEq, Eq)]
14pub enum IncludeKind {
15    /// `#include "foo.h"` — quoted include.
16    Quoted,
17    /// `#include <foo.h>` — angle-bracket include.
18    AngleBracket,
19    /// `#include MACRO` — computed include, cannot resolve by text scanning.
20    Computed(String),
21}
22
23/// A parsed `#include` directive.
24#[derive(Debug, Clone, PartialEq, Eq)]
25pub struct IncludeDirective {
26    /// The kind of include.
27    pub kind: IncludeKind,
28    /// The path as written in the source (for Quoted/AngleBracket),
29    /// or the macro name (for Computed).
30    pub path: String,
31    /// 1-based line number in the file.
32    pub line: u32,
33}
34
35/// Result of a recursive include scan.
36#[derive(Debug, Clone)]
37pub struct ScanResult {
38    /// All resolved include paths (absolute, deduplicated).
39    pub resolved: Vec<NormalizedPath>,
40    /// Include paths that could not be resolved to an existing file.
41    pub unresolved: Vec<String>,
42    /// True if any `#include MACRO` (computed include) was found.
43    pub has_computed: bool,
44}
45
46/// Scan a source string for `#include` directives.
47///
48/// Skips directives inside `//` line comments, `/* */` block comments,
49/// and string/character literals. Handles backslash line continuations.
50pub fn scan_includes_str(source: &str) -> Vec<IncludeDirective> {
51    let joined = join_continuations(source);
52    let mut results = Vec::new();
53
54    // Track original line numbers: each line in `joined` maps to a source line.
55    // After joining continuations, we need to track the starting line of each
56    // logical line.
57    let line_map = build_line_map(source);
58
59    let mut in_block_comment = false;
60
61    for (logical_idx, line) in joined.lines().enumerate() {
62        let source_line = if logical_idx < line_map.len() {
63            line_map[logical_idx]
64        } else {
65            (logical_idx + 1) as u32
66        };
67
68        if in_block_comment {
69            if let Some(end) = line.find("*/") {
70                // Block comment ends on this line. Check rest of line.
71                let rest = &line[end + 2..];
72                if let Some(dir) = parse_include_from_line(rest) {
73                    results.push(IncludeDirective {
74                        line: source_line,
75                        ..dir
76                    });
77                }
78                in_block_comment = false;
79                // Could have another block comment start after...
80                if rest.contains("/*") {
81                    let after_end = rest.find("/*").unwrap();
82                    if !rest[..after_end].contains("*/") {
83                        in_block_comment = true;
84                    }
85                }
86            }
87            continue;
88        }
89
90        // Strip line comments first.
91        let effective = strip_comments(line, &mut in_block_comment);
92        if let Some(dir) = parse_include_from_line(&effective) {
93            results.push(IncludeDirective {
94                line: source_line,
95                ..dir
96            });
97        }
98    }
99
100    results
101}
102
103/// Scan a file on disk for `#include` directives.
104///
105/// # Errors
106///
107/// Returns an error if the file cannot be read.
108pub fn scan_includes(path: &Path) -> std::io::Result<Vec<IncludeDirective>> {
109    let source = std::fs::read_to_string(path)?;
110    Ok(scan_includes_str(&source))
111}
112
113/// Resolve a single `#include` directive to an absolute path.
114///
115/// For quoted includes, searches the including file's directory first,
116/// then `-iquote`, `-I`, `-isystem`, `-idirafter` in order.
117///
118/// For angle-bracket includes, searches `-I`, `-isystem`, `-idirafter`.
119///
120/// Returns `None` if the file is not found in any search path.
121pub fn resolve_include(
122    directive: &IncludeDirective,
123    search: &IncludeSearchPaths,
124    including_file_dir: &Path,
125) -> Option<NormalizedPath> {
126    match &directive.kind {
127        IncludeKind::Quoted => {
128            // 1. Directory of the including file.
129            let candidate = including_file_dir.join(&directive.path);
130            if candidate.is_file() {
131                return Some(normalize(&candidate));
132            }
133            // 2. Search paths for quoted includes.
134            for dir in search.quoted_search_dirs() {
135                let candidate = dir.join(&directive.path);
136                if candidate.is_file() {
137                    return Some(normalize(&candidate));
138                }
139            }
140            None
141        }
142        IncludeKind::AngleBracket => {
143            for dir in search.angle_search_dirs() {
144                let candidate = dir.join(&directive.path);
145                if candidate.is_file() {
146                    return Some(normalize(&candidate));
147                }
148            }
149            None
150        }
151        IncludeKind::Computed(_) => None,
152    }
153}
154
155/// Recursively scan a source file for all transitive includes.
156///
157/// Builds the full include list by scanning the source file, resolving
158/// each `#include`, then scanning each resolved header, and so on, using
159/// a parallel BFS over per-level frontiers. Headers within a frontier are
160/// read and parsed in parallel via rayon; new resolutions feed the next
161/// frontier. A `DashSet` deduplicates so each header is scanned exactly
162/// once across the DAG, even with circular or diamond includes.
163///
164/// `resolved` returns in BFS-level order (was DFS-post-order before
165/// parallelization). Callers in `graph.rs` only iterate the list to hash
166/// all files; no order invariant is broken.
167pub fn scan_recursive(source: &Path, search: &IncludeSearchPaths) -> ScanResult {
168    use dashmap::DashSet;
169    use rayon::prelude::*;
170    use std::sync::atomic::{AtomicBool, Ordering};
171    use std::sync::Mutex;
172
173    let visited: DashSet<NormalizedPath> = DashSet::new();
174    let resolved: Mutex<Vec<NormalizedPath>> = Mutex::new(Vec::new());
175    let unresolved: Mutex<Vec<String>> = Mutex::new(Vec::new());
176    let has_computed = AtomicBool::new(false);
177
178    // Mark the source itself as visited so we don't re-scan it via a
179    // self-include chain.
180    if let Some(abs) = try_normalize(source) {
181        visited.insert(abs);
182    }
183
184    let mut frontier: Vec<NormalizedPath> = vec![NormalizedPath::from(source)];
185    while !frontier.is_empty() {
186        let next: Vec<NormalizedPath> = frontier
187            .par_iter()
188            .flat_map_iter(|file| {
189                scan_one_level(
190                    file.as_path(),
191                    search,
192                    &visited,
193                    &resolved,
194                    &unresolved,
195                    &has_computed,
196                )
197            })
198            .collect();
199        frontier = next;
200    }
201
202    ScanResult {
203        resolved: resolved.into_inner().expect("resolved mutex poisoned"),
204        unresolved: unresolved.into_inner().expect("unresolved mutex poisoned"),
205        has_computed: has_computed.load(Ordering::Relaxed),
206    }
207}
208
209/// Scan one file: read it, parse `#include`s, resolve each, and return the
210/// list of newly-discovered resolved paths for the next frontier level.
211///
212/// All four shared collections take exactly one lock per scanned file: the
213/// per-file results are buffered locally and pushed in a single batch at
214/// the end. This keeps Mutex contention proportional to (file count) and
215/// not to (include count).
216fn scan_one_level(
217    file: &Path,
218    search: &IncludeSearchPaths,
219    visited: &dashmap::DashSet<NormalizedPath>,
220    resolved: &std::sync::Mutex<Vec<NormalizedPath>>,
221    unresolved: &std::sync::Mutex<Vec<String>>,
222    has_computed: &std::sync::atomic::AtomicBool,
223) -> Vec<NormalizedPath> {
224    let directives = match scan_includes(file) {
225        Ok(d) => d,
226        Err(_) => return Vec::new(),
227    };
228
229    let file_dir = file.parent().unwrap_or(Path::new("."));
230
231    let mut new_for_next: Vec<NormalizedPath> = Vec::new();
232    let mut local_resolved: Vec<NormalizedPath> = Vec::new();
233    let mut local_unresolved: Vec<String> = Vec::new();
234    let mut saw_computed = false;
235
236    for directive in &directives {
237        match &directive.kind {
238            IncludeKind::Computed(_) => {
239                saw_computed = true;
240            }
241            _ => {
242                if let Some(abs_path) = resolve_include(directive, search, file_dir) {
243                    if visited.insert(abs_path.clone()) {
244                        local_resolved.push(abs_path.clone());
245                        new_for_next.push(abs_path);
246                    }
247                } else {
248                    local_unresolved.push(directive.path.clone());
249                }
250            }
251        }
252    }
253
254    if !local_resolved.is_empty() {
255        resolved
256            .lock()
257            .expect("resolved mutex poisoned")
258            .extend(local_resolved);
259    }
260    if !local_unresolved.is_empty() {
261        unresolved
262            .lock()
263            .expect("unresolved mutex poisoned")
264            .extend(local_unresolved);
265    }
266    if saw_computed {
267        has_computed.store(true, std::sync::atomic::Ordering::Relaxed);
268    }
269
270    new_for_next
271}
272
273// ── Helpers ──────────────────────────────────────────────────────────
274
275/// Join backslash-continued lines into single logical lines.
276fn join_continuations(source: &str) -> String {
277    let mut result = String::with_capacity(source.len());
278    let mut chars = source.chars().peekable();
279
280    while let Some(ch) = chars.next() {
281        if ch == '\\' {
282            match chars.peek() {
283                Some('\n') => {
284                    chars.next(); // consume the newline
285                                  // Don't emit either the backslash or the newline.
286                }
287                Some('\r') => {
288                    chars.next(); // consume \r
289                    if chars.peek() == Some(&'\n') {
290                        chars.next(); // consume \n
291                    }
292                    // Don't emit.
293                }
294                _ => result.push(ch),
295            }
296        } else {
297            result.push(ch);
298        }
299    }
300
301    result
302}
303
304/// Build a map from logical line index to 1-based source line number.
305/// Accounts for backslash continuations merging multiple source lines.
306fn build_line_map(source: &str) -> Vec<u32> {
307    let mut map = Vec::new();
308    let mut source_line: u32 = 1;
309    let mut continued = false;
310
311    for line in source.split('\n') {
312        if !continued {
313            map.push(source_line);
314        }
315        let trimmed = line.trim_end_matches('\r');
316        continued = trimmed.ends_with('\\');
317        source_line += 1;
318    }
319
320    map
321}
322
323/// Strip line comments and block comments from a line.
324/// Updates `in_block_comment` state for multi-line block comments.
325///
326/// String literals are NOT stripped. This is intentional:
327/// `#include "foo.h"` has quotes that look like strings but are part of
328/// the directive syntax. False positives like `const char* s = "#include ..."`
329/// are handled by `parse_include_from_line` which requires `#` to be the
330/// first non-whitespace character on the line.
331fn strip_comments(line: &str, in_block_comment: &mut bool) -> String {
332    let mut result = String::with_capacity(line.len());
333    let bytes = line.as_bytes();
334    let len = bytes.len();
335    let mut i = 0;
336
337    while i < len {
338        if *in_block_comment {
339            if i + 1 < len && bytes[i] == b'*' && bytes[i + 1] == b'/' {
340                *in_block_comment = false;
341                i += 2;
342            } else {
343                i += 1;
344            }
345            continue;
346        }
347
348        // Line comment — stop processing this line.
349        if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'/' {
350            break;
351        }
352
353        // Block comment start.
354        if i + 1 < len && bytes[i] == b'/' && bytes[i + 1] == b'*' {
355            *in_block_comment = true;
356            i += 2;
357            continue;
358        }
359
360        result.push(bytes[i] as char);
361        i += 1;
362    }
363
364    result
365}
366
367/// Parse an `#include` directive from a (comment-stripped) line.
368fn parse_include_from_line(line: &str) -> Option<IncludeDirective> {
369    let trimmed = line.trim();
370
371    // Must start with #
372    let after_hash = trimmed.strip_prefix('#')?;
373    let after_hash = after_hash.trim();
374
375    // Must be "include"
376    let after_include = after_hash.strip_prefix("include")?;
377
378    // "include" must not be part of a longer identifier.
379    if let Some(next_ch) = after_include.chars().next() {
380        if next_ch.is_alphanumeric() || next_ch == '_' {
381            return None;
382        }
383    }
384
385    let rest = after_include.trim();
386
387    if rest.is_empty() {
388        return None;
389    }
390
391    // #include "path"
392    if let Some(inner) = rest.strip_prefix('"') {
393        let end = inner.find('"')?;
394        let path = &inner[..end];
395        if path.is_empty() {
396            return None;
397        }
398        return Some(IncludeDirective {
399            kind: IncludeKind::Quoted,
400            path: path.to_string(),
401            line: 0, // Filled in by caller.
402        });
403    }
404
405    // #include <path>
406    if let Some(inner) = rest.strip_prefix('<') {
407        let end = inner.find('>')?;
408        let path = &inner[..end];
409        if path.is_empty() {
410            return None;
411        }
412        return Some(IncludeDirective {
413            kind: IncludeKind::AngleBracket,
414            path: path.to_string(),
415            line: 0,
416        });
417    }
418
419    // #include MACRO — computed include.
420    let macro_name: String = rest
421        .chars()
422        .take_while(|c| c.is_alphanumeric() || *c == '_')
423        .collect();
424    if !macro_name.is_empty() {
425        return Some(IncludeDirective {
426            kind: IncludeKind::Computed(macro_name.clone()),
427            path: macro_name,
428            line: 0,
429        });
430    }
431
432    None
433}
434
435/// Normalize a path to an absolute path (best-effort, no symlink resolution).
436fn normalize(path: &Path) -> NormalizedPath {
437    try_normalize(path).unwrap_or_else(|| path.into())
438}
439
440fn try_normalize(path: &Path) -> Option<NormalizedPath> {
441    // Use canonicalize which resolves symlinks and produces an absolute path.
442    // On Windows, canonicalize produces \\?\ extended-length paths which must
443    // be stripped to match the watcher's path format for journal lookups.
444    let p = path.canonicalize().ok()?;
445    #[cfg(windows)]
446    {
447        let s = p.to_string_lossy();
448        if let Some(stripped) = s.strip_prefix(r"\\?\") {
449            return Some(NormalizedPath::from(stripped));
450        }
451    }
452    Some(p.into())
453}
454
455#[cfg(test)]
456mod tests {
457    use super::*;
458    use tempfile::TempDir;
459
460    // ── scan_includes_str tests ─────────────────────────────────────
461
462    #[test]
463    fn basic_quoted_include() {
464        let source = r#"#include "foo.h""#;
465        let includes = scan_includes_str(source);
466        assert_eq!(includes.len(), 1);
467        assert_eq!(includes[0].kind, IncludeKind::Quoted);
468        assert_eq!(includes[0].path, "foo.h");
469        assert_eq!(includes[0].line, 1);
470    }
471
472    #[test]
473    fn basic_angle_bracket_include() {
474        let source = "#include <stdio.h>";
475        let includes = scan_includes_str(source);
476        assert_eq!(includes.len(), 1);
477        assert_eq!(includes[0].kind, IncludeKind::AngleBracket);
478        assert_eq!(includes[0].path, "stdio.h");
479    }
480
481    #[test]
482    fn multiple_includes() {
483        let source = r#"
484#include <stdio.h>
485#include "config.h"
486#include <stdlib.h>
487"#;
488        let includes = scan_includes_str(source);
489        assert_eq!(includes.len(), 3);
490        assert_eq!(includes[0].path, "stdio.h");
491        assert_eq!(includes[1].path, "config.h");
492        assert_eq!(includes[2].path, "stdlib.h");
493    }
494
495    #[test]
496    fn include_with_path_separators() {
497        let source = r#"#include "path/to/header.h""#;
498        let includes = scan_includes_str(source);
499        assert_eq!(includes.len(), 1);
500        assert_eq!(includes[0].path, "path/to/header.h");
501    }
502
503    #[test]
504    fn computed_include() {
505        let source = "#include PLATFORM_HEADER";
506        let includes = scan_includes_str(source);
507        assert_eq!(includes.len(), 1);
508        assert_eq!(
509            includes[0].kind,
510            IncludeKind::Computed("PLATFORM_HEADER".to_string())
511        );
512        assert_eq!(includes[0].path, "PLATFORM_HEADER");
513    }
514
515    #[test]
516    fn skip_line_comment() {
517        let source = r#"
518// #include "old.h"
519#include "real.h"
520"#;
521        let includes = scan_includes_str(source);
522        assert_eq!(includes.len(), 1);
523        assert_eq!(includes[0].path, "real.h");
524    }
525
526    #[test]
527    fn skip_block_comment() {
528        let source = r#"
529/* #include "old.h" */
530#include "real.h"
531"#;
532        let includes = scan_includes_str(source);
533        assert_eq!(includes.len(), 1);
534        assert_eq!(includes[0].path, "real.h");
535    }
536
537    #[test]
538    fn skip_multiline_block_comment() {
539        let source = r#"
540/*
541#include "old1.h"
542#include "old2.h"
543*/
544#include "real.h"
545"#;
546        let includes = scan_includes_str(source);
547        assert_eq!(includes.len(), 1);
548        assert_eq!(includes[0].path, "real.h");
549    }
550
551    #[test]
552    fn skip_include_in_string_literal() {
553        let source = "const char* s = \"#include \\\"fake.h\\\"\";\n#include \"real.h\"\n";
554        let includes = scan_includes_str(source);
555        assert_eq!(includes.len(), 1);
556        assert_eq!(includes[0].path, "real.h");
557    }
558
559    #[test]
560    fn backslash_continuation() {
561        let source = "#in\\\nclude \"continued.h\"";
562        let includes = scan_includes_str(source);
563        assert_eq!(includes.len(), 1);
564        assert_eq!(includes[0].path, "continued.h");
565    }
566
567    #[test]
568    fn indented_include() {
569        let source = "    #include <indented.h>";
570        let includes = scan_includes_str(source);
571        assert_eq!(includes.len(), 1);
572        assert_eq!(includes[0].path, "indented.h");
573    }
574
575    #[test]
576    fn hash_space_include() {
577        let source = "#  include <spaced.h>";
578        let includes = scan_includes_str(source);
579        assert_eq!(includes.len(), 1);
580        assert_eq!(includes[0].path, "spaced.h");
581    }
582
583    #[test]
584    fn not_include_directive() {
585        let source = "#define FOO 1\n#ifdef BAR\n#endif\n";
586        let includes = scan_includes_str(source);
587        assert!(includes.is_empty());
588    }
589
590    #[test]
591    fn include_guard_not_confused() {
592        let source = "#ifndef FOO_H\n#define FOO_H\n#include \"bar.h\"\n#endif\n";
593        let includes = scan_includes_str(source);
594        assert_eq!(includes.len(), 1);
595        assert_eq!(includes[0].path, "bar.h");
596    }
597
598    #[test]
599    fn line_numbers_are_correct() {
600        let source = "// preamble\n\n#include \"a.h\"\n\n#include <b.h>\n";
601        let includes = scan_includes_str(source);
602        assert_eq!(includes.len(), 2);
603        assert_eq!(includes[0].line, 3);
604        assert_eq!(includes[1].line, 5);
605    }
606
607    #[test]
608    fn empty_source() {
609        let includes = scan_includes_str("");
610        assert!(includes.is_empty());
611    }
612
613    #[test]
614    fn include_after_code() {
615        let source = "int x = 1;\n#include \"late.h\"\n";
616        let includes = scan_includes_str(source);
617        assert_eq!(includes.len(), 1);
618        assert_eq!(includes[0].path, "late.h");
619    }
620
621    #[test]
622    fn block_comment_ending_on_include_line() {
623        let source = "/* comment */ #include \"after.h\"";
624        let includes = scan_includes_str(source);
625        assert_eq!(includes.len(), 1);
626        assert_eq!(includes[0].path, "after.h");
627    }
628
629    // ── resolve_include tests ────────────────────────────────────────
630
631    #[test]
632    fn resolve_quoted_in_file_dir() {
633        let dir = TempDir::new().unwrap();
634        let header = dir.path().join("local.h");
635        std::fs::write(&header, "// header").unwrap();
636
637        let directive = IncludeDirective {
638            kind: IncludeKind::Quoted,
639            path: "local.h".to_string(),
640            line: 1,
641        };
642        let search = IncludeSearchPaths::default();
643        let result = resolve_include(&directive, &search, dir.path());
644        assert!(result.is_some());
645        assert_eq!(result.unwrap(), normalize(&header));
646    }
647
648    #[test]
649    fn resolve_quoted_in_iquote_dir() {
650        let dir = TempDir::new().unwrap();
651        let iquote_dir = dir.path().join("iquote");
652        std::fs::create_dir(&iquote_dir).unwrap();
653        let header = iquote_dir.join("q.h");
654        std::fs::write(&header, "// header").unwrap();
655
656        let directive = IncludeDirective {
657            kind: IncludeKind::Quoted,
658            path: "q.h".to_string(),
659            line: 1,
660        };
661        let search = IncludeSearchPaths {
662            iquote: vec![iquote_dir.into()],
663            ..Default::default()
664        };
665        // Not in the including file's dir — should find via iquote.
666        let other_dir = dir.path().join("other");
667        std::fs::create_dir(&other_dir).unwrap();
668        let result = resolve_include(&directive, &search, &other_dir);
669        assert!(result.is_some());
670        assert_eq!(result.unwrap(), normalize(&header));
671    }
672
673    #[test]
674    fn resolve_angle_bracket_in_user_dir() {
675        let dir = TempDir::new().unwrap();
676        let inc = dir.path().join("inc");
677        std::fs::create_dir(&inc).unwrap();
678        let header = inc.join("sys.h");
679        std::fs::write(&header, "// header").unwrap();
680
681        let directive = IncludeDirective {
682            kind: IncludeKind::AngleBracket,
683            path: "sys.h".to_string(),
684            line: 1,
685        };
686        let search = IncludeSearchPaths {
687            user: vec![inc.into()],
688            ..Default::default()
689        };
690        let result = resolve_include(&directive, &search, dir.path());
691        assert!(result.is_some());
692    }
693
694    #[test]
695    fn resolve_angle_bracket_skips_iquote() {
696        let dir = TempDir::new().unwrap();
697        let iquote_dir = dir.path().join("iquote");
698        std::fs::create_dir(&iquote_dir).unwrap();
699        let header = iquote_dir.join("only_iquote.h");
700        std::fs::write(&header, "// header").unwrap();
701
702        let directive = IncludeDirective {
703            kind: IncludeKind::AngleBracket,
704            path: "only_iquote.h".to_string(),
705            line: 1,
706        };
707        let search = IncludeSearchPaths {
708            iquote: vec![iquote_dir.into()],
709            ..Default::default()
710        };
711        let result = resolve_include(&directive, &search, dir.path());
712        assert!(result.is_none(), "angle bracket should not search iquote");
713    }
714
715    #[test]
716    fn resolve_unresolved_returns_none() {
717        let directive = IncludeDirective {
718            kind: IncludeKind::Quoted,
719            path: "nonexistent.h".to_string(),
720            line: 1,
721        };
722        let search = IncludeSearchPaths::default();
723        let result = resolve_include(&directive, &search, Path::new("/tmp"));
724        assert!(result.is_none());
725    }
726
727    #[test]
728    fn resolve_computed_returns_none() {
729        let directive = IncludeDirective {
730            kind: IncludeKind::Computed("MACRO".to_string()),
731            path: "MACRO".to_string(),
732            line: 1,
733        };
734        let search = IncludeSearchPaths::default();
735        let result = resolve_include(&directive, &search, Path::new("/tmp"));
736        assert!(result.is_none());
737    }
738
739    #[test]
740    fn resolve_search_order_user_before_system() {
741        let dir = TempDir::new().unwrap();
742        let user_dir = dir.path().join("user");
743        let sys_dir = dir.path().join("sys");
744        std::fs::create_dir(&user_dir).unwrap();
745        std::fs::create_dir(&sys_dir).unwrap();
746
747        let user_header = user_dir.join("shared.h");
748        let sys_header = sys_dir.join("shared.h");
749        std::fs::write(&user_header, "// user").unwrap();
750        std::fs::write(&sys_header, "// system").unwrap();
751
752        let directive = IncludeDirective {
753            kind: IncludeKind::AngleBracket,
754            path: "shared.h".to_string(),
755            line: 1,
756        };
757        let search = IncludeSearchPaths {
758            user: vec![user_dir.into()],
759            system: vec![sys_dir.into()],
760            ..Default::default()
761        };
762        let result = resolve_include(&directive, &search, dir.path()).unwrap();
763        assert_eq!(result, normalize(&user_header));
764    }
765
766    // ── scan_recursive tests ─────────────────────────────────────────
767
768    #[test]
769    fn recursive_scan_finds_transitive_includes() {
770        let dir = TempDir::new().unwrap();
771
772        // main.c -> a.h -> b.h
773        std::fs::write(dir.path().join("main.c"), "#include \"a.h\"\n").unwrap();
774        std::fs::write(dir.path().join("a.h"), "#include \"b.h\"\n").unwrap();
775        std::fs::write(dir.path().join("b.h"), "// leaf\n").unwrap();
776
777        let search = IncludeSearchPaths::default();
778        let result = scan_recursive(&dir.path().join("main.c"), &search);
779
780        assert_eq!(result.resolved.len(), 2);
781        assert!(result
782            .resolved
783            .contains(&normalize(&dir.path().join("a.h"))));
784        assert!(result
785            .resolved
786            .contains(&normalize(&dir.path().join("b.h"))));
787        assert!(result.unresolved.is_empty());
788        assert!(!result.has_computed);
789    }
790
791    #[test]
792    fn recursive_scan_handles_cycles() {
793        let dir = TempDir::new().unwrap();
794
795        // a.h -> b.h -> a.h (cycle)
796        std::fs::write(dir.path().join("main.c"), "#include \"a.h\"\n").unwrap();
797        std::fs::write(dir.path().join("a.h"), "#include \"b.h\"\n").unwrap();
798        std::fs::write(dir.path().join("b.h"), "#include \"a.h\"\n").unwrap();
799
800        let search = IncludeSearchPaths::default();
801        let result = scan_recursive(&dir.path().join("main.c"), &search);
802
803        // Should find both a.h and b.h without infinite loop.
804        assert_eq!(result.resolved.len(), 2);
805    }
806
807    #[test]
808    fn recursive_scan_records_unresolved() {
809        let dir = TempDir::new().unwrap();
810
811        std::fs::write(
812            dir.path().join("main.c"),
813            "#include \"exists.h\"\n#include <missing.h>\n",
814        )
815        .unwrap();
816        std::fs::write(dir.path().join("exists.h"), "// ok\n").unwrap();
817
818        let search = IncludeSearchPaths::default();
819        let result = scan_recursive(&dir.path().join("main.c"), &search);
820
821        assert_eq!(result.resolved.len(), 1);
822        assert_eq!(result.unresolved, vec!["missing.h"]);
823    }
824
825    #[test]
826    fn recursive_scan_detects_computed_includes() {
827        let dir = TempDir::new().unwrap();
828
829        std::fs::write(
830            dir.path().join("main.c"),
831            "#include PLATFORM_HEADER\n#include \"normal.h\"\n",
832        )
833        .unwrap();
834        std::fs::write(dir.path().join("normal.h"), "// ok\n").unwrap();
835
836        let search = IncludeSearchPaths::default();
837        let result = scan_recursive(&dir.path().join("main.c"), &search);
838
839        assert!(result.has_computed);
840        assert_eq!(result.resolved.len(), 1);
841    }
842
843    #[test]
844    fn recursive_scan_deduplicates() {
845        let dir = TempDir::new().unwrap();
846
847        // main.c includes a.h and b.h, both include common.h
848        std::fs::write(
849            dir.path().join("main.c"),
850            "#include \"a.h\"\n#include \"b.h\"\n",
851        )
852        .unwrap();
853        std::fs::write(dir.path().join("a.h"), "#include \"common.h\"\n").unwrap();
854        std::fs::write(dir.path().join("b.h"), "#include \"common.h\"\n").unwrap();
855        std::fs::write(dir.path().join("common.h"), "// shared\n").unwrap();
856
857        let search = IncludeSearchPaths::default();
858        let result = scan_recursive(&dir.path().join("main.c"), &search);
859
860        // a.h, b.h, common.h — each once.
861        assert_eq!(result.resolved.len(), 3);
862    }
863
864    #[test]
865    fn recursive_scan_with_search_paths() {
866        let dir = TempDir::new().unwrap();
867        let inc = dir.path().join("inc");
868        std::fs::create_dir(&inc).unwrap();
869
870        std::fs::write(dir.path().join("main.c"), "#include <lib.h>\n").unwrap();
871        std::fs::write(inc.join("lib.h"), "#include \"detail.h\"\n").unwrap();
872        std::fs::write(inc.join("detail.h"), "// impl\n").unwrap();
873
874        let search = IncludeSearchPaths {
875            user: vec![inc.clone().into()],
876            ..Default::default()
877        };
878        let result = scan_recursive(&dir.path().join("main.c"), &search);
879
880        assert_eq!(result.resolved.len(), 2);
881        assert!(result.resolved.contains(&normalize(&inc.join("lib.h"))));
882        assert!(result.resolved.contains(&normalize(&inc.join("detail.h"))));
883    }
884
885    // ── Helper function tests ────────────────────────────────────────
886
887    #[test]
888    fn join_continuations_merges_lines() {
889        assert_eq!(join_continuations("a\\\nb"), "ab");
890        assert_eq!(join_continuations("a\\\r\nb"), "ab");
891    }
892
893    #[test]
894    fn join_continuations_preserves_normal_lines() {
895        assert_eq!(join_continuations("a\nb"), "a\nb");
896    }
897
898    #[test]
899    fn strip_comments_handles_line_comment() {
900        let mut in_block = false;
901        let result = strip_comments("code // comment", &mut in_block);
902        assert_eq!(result, "code ");
903        assert!(!in_block);
904    }
905
906    #[test]
907    fn strip_comments_handles_block_comment() {
908        let mut in_block = false;
909        let result = strip_comments("before /* inside */ after", &mut in_block);
910        assert_eq!(result, "before  after");
911        assert!(!in_block);
912    }
913
914    #[test]
915    fn strip_comments_handles_unterminated_block() {
916        let mut in_block = false;
917        let result = strip_comments("code /* start", &mut in_block);
918        assert_eq!(result, "code ");
919        assert!(in_block);
920    }
921
922    #[test]
923    fn strip_comments_preserves_string_literal() {
924        let mut in_block = false;
925        let result = strip_comments(r#"x = "hello""#, &mut in_block);
926        assert_eq!(result, r#"x = "hello""#);
927    }
928}