Skip to main content

tsz_cli/
driver_sources.rs

1//! Source file I/O, config helpers, and file reading for the compilation driver.
2
3use super::*;
4
5/// Result of reading a source file - either valid text or binary/unreadable
6#[derive(Debug, Clone)]
7pub enum FileReadResult {
8    /// File was successfully read as UTF-8 text
9    Text(String),
10    /// File appears to be binary (emit TS1490), but keep best-effort text for parser diagnostics
11    Binary(String),
12    /// File could not be read (I/O error)
13    Error(String),
14}
15
16/// Read a source file, detecting binary files that should emit TS1490.
17///
18/// TypeScript detects binary files by checking for:
19/// - UTF-16 BOM (FE FF for BE, FF FE for LE)
20/// - Non-valid UTF-8 sequences
21/// - Many control bytes (not expected in source files)
22/// - Files with many null bytes
23pub fn read_source_file(path: &Path) -> FileReadResult {
24    // Read as bytes first
25    let bytes = match std::fs::read(path) {
26        Ok(b) => b,
27        Err(e) => return FileReadResult::Error(e.to_string()),
28    };
29
30    // Check for binary indicators
31    if is_binary_file(&bytes) {
32        return FileReadResult::Binary(String::from_utf8_lossy(&bytes).to_string());
33    }
34
35    // Try to decode as UTF-8
36    match String::from_utf8(bytes) {
37        Ok(text) => FileReadResult::Text(text),
38        Err(err) => FileReadResult::Binary(String::from_utf8_lossy(err.as_bytes()).to_string()),
39    }
40}
41
42/// Check if file content appears to be binary (not valid source code).
43///
44/// Matches TypeScript's binary detection:
45/// - UTF-16 BOM at start
46/// - Many consecutive null bytes (embedded binaries, corrupted files)
47/// - Repeated control bytes in first 1024 bytes
48pub(super) fn is_binary_file(bytes: &[u8]) -> bool {
49    if bytes.is_empty() {
50        return false;
51    }
52
53    // Check for UTF-16 BOM
54    // UTF-16 BE: FE FF
55    // UTF-16 LE: FF FE
56    if bytes.len() >= 2
57        && ((bytes[0] == 0xFE && bytes[1] == 0xFF) || (bytes[0] == 0xFF && bytes[1] == 0xFE))
58    {
59        return true;
60    }
61
62    // Check for many null bytes (binary file indicator)
63    // TypeScript considers files with many nulls as binary
64    let null_count = bytes.iter().take(1024).filter(|&&b| b == 0).count();
65    if null_count > 10 {
66        return true;
67    }
68
69    // Check for consecutive null bytes (UTF-16 or binary)
70    // UTF-16 text will have null bytes between ASCII characters
71    let mut consecutive_nulls = 0;
72    for &byte in bytes.iter().take(512) {
73        if byte == 0 {
74            consecutive_nulls += 1;
75            if consecutive_nulls >= 4 {
76                return true;
77            }
78        } else {
79            consecutive_nulls = 0;
80        }
81    }
82
83    // Check for non-whitespace control bytes (e.g. U+0000/Control-Range from garbled UTF-16 read as UTF-8)
84    let control_count = bytes
85        .iter()
86        .take(1024)
87        .filter(|&&b| {
88            b < 0x20 && b != b'\t' && b != b'\n' && b != b'\r' && b != b'\x0C' && b != b'\x0B'
89        })
90        .count();
91    if control_count >= 4 {
92        return true;
93    }
94
95    false
96}
97
98#[derive(Debug, Clone)]
99pub(super) struct SourceEntry {
100    pub(super) path: PathBuf,
101    pub(super) text: Option<String>,
102    /// If true, this file appears to be binary (emit TS1490)
103    pub(super) is_binary: bool,
104}
105
106pub(super) fn sources_have_no_default_lib(sources: &[SourceEntry]) -> bool {
107    sources.iter().any(source_has_no_default_lib)
108}
109
110pub(super) fn source_has_no_default_lib(source: &SourceEntry) -> bool {
111    if let Some(text) = source.text.as_deref() {
112        return has_no_default_lib_directive(text);
113    }
114    let Ok(text) = std::fs::read_to_string(&source.path) else {
115        return false;
116    };
117    has_no_default_lib_directive(&text)
118}
119
120pub(super) fn has_no_default_lib_directive(source: &str) -> bool {
121    for line in source.lines() {
122        let trimmed = line.trim_start();
123        if !trimmed.starts_with("///") {
124            if trimmed.is_empty() {
125                continue;
126            }
127            break;
128        }
129        if let Some(true) = parse_reference_no_default_lib_value(trimmed) {
130            return true;
131        }
132    }
133    false
134}
135
136pub(super) fn sources_have_no_types_and_symbols(sources: &[SourceEntry]) -> bool {
137    sources.iter().any(source_has_no_types_and_symbols)
138}
139
140pub(super) fn source_has_no_types_and_symbols(source: &SourceEntry) -> bool {
141    if let Some(text) = source.text.as_deref() {
142        return has_no_types_and_symbols_directive(text);
143    }
144    let Ok(text) = std::fs::read_to_string(&source.path) else {
145        return false;
146    };
147    has_no_types_and_symbols_directive(&text)
148}
149
150pub(crate) fn has_no_types_and_symbols_directive(source: &str) -> bool {
151    for line in source.lines().take(32) {
152        let trimmed = line.trim_start();
153        if !trimmed.starts_with("//") {
154            continue;
155        }
156
157        let lower = trimmed.to_ascii_lowercase();
158        let Some(idx) = lower.find("@notypesandsymbols") else {
159            continue;
160        };
161
162        let mut rest = &trimmed[idx + "@noTypesAndSymbols".len()..];
163        rest = rest.trim_start();
164        if !rest.starts_with(':') {
165            continue;
166        }
167        rest = rest[1..].trim_start();
168
169        let value = rest
170            .split(|c: char| c == ',' || c == ';' || c.is_whitespace())
171            .find(|s| !s.is_empty())
172            .unwrap_or("");
173        return value.eq_ignore_ascii_case("true");
174    }
175    false
176}
177
178pub(super) fn parse_reference_no_default_lib_value(line: &str) -> Option<bool> {
179    let needle = "no-default-lib";
180    let lower = line.to_ascii_lowercase();
181    let idx = lower.find(needle)?;
182    let mut rest = &line[idx + needle.len()..];
183    rest = rest.trim_start();
184    if !rest.starts_with('=') {
185        return None;
186    }
187    rest = rest[1..].trim_start();
188    let quote = rest.as_bytes().first().copied()?;
189    if quote != b'"' && quote != b'\'' {
190        return None;
191    }
192    let rest = &rest[1..];
193    let end = rest.find(quote as char)?;
194    let value = rest[..end].trim();
195    match value.to_ascii_lowercase().as_str() {
196        "true" => Some(true),
197        "false" => Some(false),
198        _ => None,
199    }
200}
201
202pub(super) struct SourceReadResult {
203    pub(super) sources: Vec<SourceEntry>,
204    pub(super) dependencies: FxHashMap<PathBuf, FxHashSet<PathBuf>>,
205}
206
207pub(crate) fn find_tsconfig(cwd: &Path) -> Option<PathBuf> {
208    let candidate = cwd.join("tsconfig.json");
209    candidate
210        .is_file()
211        .then(|| canonicalize_or_owned(&candidate))
212}
213
214pub(crate) fn resolve_tsconfig_path(cwd: &Path, project: Option<&Path>) -> Result<Option<PathBuf>> {
215    let Some(project) = project else {
216        return Ok(find_tsconfig(cwd));
217    };
218
219    let mut candidate = if project.is_absolute() {
220        project.to_path_buf()
221    } else {
222        cwd.join(project)
223    };
224
225    if candidate.is_dir() {
226        candidate = candidate.join("tsconfig.json");
227    }
228
229    if !candidate.exists() {
230        bail!("tsconfig not found at {}", candidate.display());
231    }
232
233    if !candidate.is_file() {
234        bail!("project path is not a file: {}", candidate.display());
235    }
236
237    Ok(Some(canonicalize_or_owned(&candidate)))
238}
239
240pub(crate) fn load_config(path: Option<&Path>) -> Result<Option<TsConfig>> {
241    let Some(path) = path else {
242        return Ok(None);
243    };
244
245    let config = load_tsconfig(path)?;
246    Ok(Some(config))
247}
248
249pub(crate) fn config_base_dir(cwd: &Path, tsconfig_path: Option<&Path>) -> PathBuf {
250    tsconfig_path
251        .and_then(|path| path.parent().map(Path::to_path_buf))
252        .unwrap_or_else(|| cwd.to_path_buf())
253}
254
255pub(super) fn build_discovery_options(
256    args: &CliArgs,
257    base_dir: &Path,
258    tsconfig_path: Option<&Path>,
259    config: Option<&TsConfig>,
260    out_dir: Option<&Path>,
261    resolved: &ResolvedCompilerOptions,
262) -> Result<FileDiscoveryOptions> {
263    let follow_links = env_flag("TSZ_FOLLOW_SYMLINKS");
264    if !args.files.is_empty() {
265        return Ok(FileDiscoveryOptions {
266            base_dir: base_dir.to_path_buf(),
267            files: args.files.clone(),
268            include: None,
269            exclude: None,
270            out_dir: out_dir.map(Path::to_path_buf),
271            follow_links,
272            allow_js: resolved.allow_js,
273        });
274    }
275
276    let Some(config) = config else {
277        bail!("no input files specified and no tsconfig.json found");
278    };
279    let Some(tsconfig_path) = tsconfig_path else {
280        bail!("no tsconfig.json path available");
281    };
282
283    let mut options = FileDiscoveryOptions::from_tsconfig(tsconfig_path, config, out_dir);
284    options.follow_links = follow_links;
285    options.allow_js = resolved.allow_js;
286    Ok(options)
287}
288
289pub(super) fn collect_type_root_files(
290    base_dir: &Path,
291    options: &ResolvedCompilerOptions,
292) -> Vec<PathBuf> {
293    let roots = match options.type_roots.as_ref() {
294        Some(roots) => roots.clone(),
295        None => default_type_roots(base_dir),
296    };
297    if roots.is_empty() {
298        return Vec::new();
299    }
300
301    let mut files = std::collections::BTreeSet::new();
302    if let Some(types) = options.types.as_ref() {
303        for name in types {
304            if let Some(entry) = resolve_type_package_from_roots(name, &roots, options) {
305                files.insert(entry);
306            }
307        }
308        return files.into_iter().collect();
309    }
310
311    for root in roots {
312        for package_root in collect_type_packages_from_root(&root) {
313            if let Some(entry) = resolve_type_package_entry(&package_root, options) {
314                files.insert(entry);
315            }
316        }
317    }
318
319    files.into_iter().collect()
320}
321
322pub(super) fn read_source_files(
323    paths: &[PathBuf],
324    base_dir: &Path,
325    options: &ResolvedCompilerOptions,
326    cache: Option<&CompilationCache>,
327    changed_paths: Option<&FxHashSet<PathBuf>>,
328) -> Result<SourceReadResult> {
329    let mut sources: FxHashMap<PathBuf, (Option<String>, bool)> = FxHashMap::default(); // (text, is_binary)
330    let mut dependencies: FxHashMap<PathBuf, FxHashSet<PathBuf>> = FxHashMap::default();
331    let mut seen = FxHashSet::default();
332    let mut pending = VecDeque::new();
333    let mut resolution_cache = ModuleResolutionCache::default();
334    let use_cache = cache.is_some() && changed_paths.is_some();
335
336    for path in paths {
337        let canonical = canonicalize_or_owned(path);
338        if seen.insert(canonical.clone()) {
339            pending.push_back(canonical);
340        }
341    }
342
343    while let Some(path) = pending.pop_front() {
344        // Use cached bind result only when we know the file hasn't changed
345        // (changed_paths is provided and this file is not in it)
346        if use_cache
347            && let Some(cache) = cache
348            && let Some(changed_paths) = changed_paths
349            && !changed_paths.contains(&path)
350            && let (Some(_), Some(cached_deps)) =
351                (cache.bind_cache.get(&path), cache.dependencies.get(&path))
352        {
353            dependencies.insert(path.clone(), cached_deps.clone());
354            sources.insert(path.clone(), (None, false)); // Cached files are not binary
355            for dep in cached_deps {
356                if seen.insert(dep.clone()) {
357                    pending.push_back(dep.clone());
358                }
359            }
360            continue;
361        }
362
363        // Read file with binary detection
364        let (text, is_binary) = match read_source_file(&path) {
365            FileReadResult::Text(t) => (t, false),
366            FileReadResult::Binary(text) => (text, true),
367            FileReadResult::Error(e) => {
368                return Err(anyhow::anyhow!("failed to read {}: {}", path.display(), e));
369            }
370        };
371        let (specifiers, type_refs) = if is_binary {
372            (vec![], vec![])
373        } else {
374            (
375                collect_module_specifiers_from_text(&path, &text),
376                tsz::checker::triple_slash_validator::extract_reference_types(&text),
377            )
378        };
379        let reference_paths = if is_binary || options.no_resolve {
380            vec![]
381        } else {
382            tsz::checker::triple_slash_validator::extract_reference_paths(&text)
383        };
384
385        sources.insert(path.clone(), (Some(text), is_binary));
386        let entry = dependencies.entry(path.clone()).or_default();
387
388        if !options.no_resolve {
389            for specifier in specifiers {
390                if let Some(resolved) = resolve_module_specifier(
391                    &path,
392                    &specifier,
393                    options,
394                    base_dir,
395                    &mut resolution_cache,
396                    &seen,
397                ) {
398                    let canonical = canonicalize_or_owned(&resolved);
399                    entry.insert(canonical.clone());
400                    if seen.insert(canonical.clone()) {
401                        pending.push_back(canonical);
402                    }
403                }
404            }
405        }
406
407        // Resolve /// <reference types="..." /> directives
408        if !type_refs.is_empty() && !options.no_resolve {
409            let type_roots = options
410                .type_roots
411                .clone()
412                .unwrap_or_else(|| default_type_roots(base_dir));
413            for (type_name, resolution_mode, _line) in type_refs {
414                let resolved =
415                    if let Some(ref mode) = resolution_mode {
416                        // With explicit resolution-mode, use exports map with the specified condition
417                        let candidates =
418                            crate::driver_resolution::type_package_candidates_pub(&type_name);
419                        let mut result = None;
420                        for root in &type_roots {
421                            for candidate in &candidates {
422                                let package_root = root.join(candidate);
423                                if package_root.is_dir()
424                                    && let Some(entry) =
425                                    crate::driver_resolution::resolve_type_package_entry_with_mode(
426                                        &package_root, mode, options,
427                                    )
428                                {
429                                    result = Some(entry);
430                                    break;
431                                }
432                            }
433                            if result.is_some() {
434                                break;
435                            }
436                        }
437                        result
438                    } else {
439                        resolve_type_package_from_roots(&type_name, &type_roots, options)
440                    };
441                if let Some(resolved) = resolved {
442                    let canonical = canonicalize_or_owned(&resolved);
443                    entry.insert(canonical.clone());
444                    if seen.insert(canonical.clone()) {
445                        pending.push_back(canonical);
446                    }
447                }
448            }
449        }
450
451        // Resolve /// <reference path="..." /> directives
452        if !reference_paths.is_empty() {
453            let base_dir = path.parent().unwrap_or_else(|| Path::new(""));
454            for (reference_path, _line_num, _quote_offset) in reference_paths {
455                if reference_path.is_empty() {
456                    continue;
457                }
458                let mut candidates = Vec::new();
459                let direct_reference = base_dir.join(&reference_path);
460                candidates.push(direct_reference);
461                if !reference_path.contains('.') {
462                    for ext in [".ts", ".tsx", ".d.ts"] {
463                        candidates.push(base_dir.join(format!("{reference_path}{ext}")));
464                    }
465                }
466
467                let Some(resolved_reference) = candidates
468                    .iter()
469                    .find(|candidate| candidate.is_file())
470                    .map(|candidate| canonicalize_or_owned(candidate))
471                else {
472                    continue;
473                };
474                entry.insert(resolved_reference.clone());
475                if seen.insert(resolved_reference.clone()) {
476                    pending.push_back(resolved_reference);
477                }
478            }
479        }
480    }
481
482    let mut list: Vec<SourceEntry> = sources
483        .into_iter()
484        .map(|(path, (text, is_binary))| SourceEntry {
485            path,
486            text,
487            is_binary,
488        })
489        .collect();
490    list.sort_by(|left, right| {
491        left.path
492            .to_string_lossy()
493            .cmp(&right.path.to_string_lossy())
494    });
495    Ok(SourceReadResult {
496        sources: list,
497        dependencies,
498    })
499}